synbo 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synbo-0.1.0/LICENSE +21 -0
- synbo-0.1.0/PKG-INFO +365 -0
- synbo-0.1.0/README.md +286 -0
- synbo-0.1.0/pyproject.toml +143 -0
- synbo-0.1.0/setup.cfg +4 -0
- synbo-0.1.0/src/synbo/__init__.py +22 -0
- synbo-0.1.0/src/synbo/algorithm/acq_function.py +337 -0
- synbo-0.1.0/src/synbo/algorithm/bo_core.py +257 -0
- synbo-0.1.0/src/synbo/algorithm/evolution.py +264 -0
- synbo-0.1.0/src/synbo/algorithm/ofat.py +3 -0
- synbo-0.1.0/src/synbo/algorithm/particle_swarm.py +283 -0
- synbo-0.1.0/src/synbo/algorithm/random_select.py +37 -0
- synbo-0.1.0/src/synbo/algorithm/sg_model.py +392 -0
- synbo-0.1.0/src/synbo/analysis/__init__.py +9 -0
- synbo-0.1.0/src/synbo/analysis/llm_analyzer.py +300 -0
- synbo-0.1.0/src/synbo/descriptor/__init__.py +4 -0
- synbo-0.1.0/src/synbo/descriptor/desc_proc.py +356 -0
- synbo-0.1.0/src/synbo/descriptor/qm_desc.py +44 -0
- synbo-0.1.0/src/synbo/descriptor/spoc_desc.py +197 -0
- synbo-0.1.0/src/synbo/initialize.py +195 -0
- synbo-0.1.0/src/synbo/optimize.py +78 -0
- synbo-0.1.0/src/synbo/synbo.py +545 -0
- synbo-0.1.0/src/synbo/utils/__init__.py +8 -0
- synbo-0.1.0/src/synbo/utils/export_data.py +183 -0
- synbo-0.1.0/src/synbo/utils/hv_calculator.py +191 -0
- synbo-0.1.0/src/synbo/utils/load_data.py +97 -0
- synbo-0.1.0/src/synbo/utils/logger.py +44 -0
- synbo-0.1.0/src/synbo/utils/util_func.py +189 -0
- synbo-0.1.0/src/synbo/utils/write_excel.py +364 -0
- synbo-0.1.0/src/synbo.egg-info/PKG-INFO +365 -0
- synbo-0.1.0/src/synbo.egg-info/SOURCES.txt +40 -0
- synbo-0.1.0/src/synbo.egg-info/dependency_links.txt +1 -0
- synbo-0.1.0/src/synbo.egg-info/entry_points.txt +2 -0
- synbo-0.1.0/src/synbo.egg-info/requires.txt +38 -0
- synbo-0.1.0/src/synbo.egg-info/top_level.txt +1 -0
- synbo-0.1.0/tests/test_excel_output.py +202 -0
- synbo-0.1.0/tests/test_optimization.py +76 -0
- synbo-0.1.0/tests/test_pipeline_HVI.py +70 -0
- synbo-0.1.0/tests/test_prohibited_reagents_tracking.py +226 -0
- synbo-0.1.0/tests/test_resave.py +67 -0
- synbo-0.1.0/tests/test_single.py +172 -0
- synbo-0.1.0/tests/test_spoc.py +29 -0
synbo-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 ReactionOpt
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
synbo-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: synbo
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A multi-objective reaction optimization framework based on Bayesian Optimization
|
|
5
|
+
Author-email: Zhenzhi Tan <tanzhenzhi8@gmail.com>
|
|
6
|
+
Maintainer-email: Zhenzhi Tan <tanzhenzhi8@gmail.com>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2025 ReactionOpt
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
Project-URL: Bug Tracker, https://github.com/yourusername/reactionopt/issues
|
|
29
|
+
Project-URL: Documentation, https://reactionopt.readthedocs.io
|
|
30
|
+
Project-URL: Homepage, https://github.com/yourusername/reactionopt
|
|
31
|
+
Project-URL: Repository, https://github.com/yourusername/reactionopt
|
|
32
|
+
Keywords: chemistry,reaction optimization,bayesian optimization,machine learning,catalysis
|
|
33
|
+
Classifier: Development Status :: 3 - Alpha
|
|
34
|
+
Classifier: Intended Audience :: Science/Research
|
|
35
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
36
|
+
Classifier: Operating System :: OS Independent
|
|
37
|
+
Classifier: Programming Language :: Python :: 3
|
|
38
|
+
Classifier: Topic :: Scientific/Engineering :: Chemistry
|
|
39
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
40
|
+
Requires-Python: >=3.10
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
License-File: LICENSE
|
|
43
|
+
Requires-Dist: numpy>=1.20.0
|
|
44
|
+
Requires-Dist: pandas>=1.3.0
|
|
45
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
46
|
+
Requires-Dist: matplotlib>=3.3.0
|
|
47
|
+
Requires-Dist: seaborn>=0.11.0
|
|
48
|
+
Requires-Dist: openpyxl>=3.0.0
|
|
49
|
+
Requires-Dist: tqdm>=4.60.0
|
|
50
|
+
Requires-Dist: rich>=13.0.0
|
|
51
|
+
Requires-Dist: typer>=0.9.0
|
|
52
|
+
Requires-Dist: pydantic>=2.0.0
|
|
53
|
+
Requires-Dist: torch>=2.8.0
|
|
54
|
+
Requires-Dist: botorch>=0.15.0
|
|
55
|
+
Requires-Dist: ax-platform>=0.2.0
|
|
56
|
+
Requires-Dist: rdkit>=2024.9.1
|
|
57
|
+
Requires-Dist: pyDOE>=0.3.8
|
|
58
|
+
Requires-Dist: lxml>=6.0.2
|
|
59
|
+
Requires-Dist: epam.indigo>=1.36.1
|
|
60
|
+
Requires-Dist: openai>=2.20.0
|
|
61
|
+
Requires-Dist: tabulate>=0.10.0
|
|
62
|
+
Requires-Dist: qspoc>=0.5.0
|
|
63
|
+
Provides-Extra: dev
|
|
64
|
+
Requires-Dist: pytest>=6.0; extra == "dev"
|
|
65
|
+
Requires-Dist: pytest-cov>=2.0; extra == "dev"
|
|
66
|
+
Requires-Dist: black>=21.0; extra == "dev"
|
|
67
|
+
Requires-Dist: flake8>=3.8; extra == "dev"
|
|
68
|
+
Requires-Dist: mypy>=0.800; extra == "dev"
|
|
69
|
+
Requires-Dist: pre-commit>=2.10; extra == "dev"
|
|
70
|
+
Provides-Extra: docs
|
|
71
|
+
Requires-Dist: sphinx>=4.0; extra == "docs"
|
|
72
|
+
Requires-Dist: sphinx-rtd-theme>=1.0; extra == "docs"
|
|
73
|
+
Requires-Dist: myst-parser>=0.15; extra == "docs"
|
|
74
|
+
Provides-Extra: test
|
|
75
|
+
Requires-Dist: pytest>=6.0; extra == "test"
|
|
76
|
+
Requires-Dist: pytest-cov>=2.0; extra == "test"
|
|
77
|
+
Requires-Dist: pytest-mock>=3.0; extra == "test"
|
|
78
|
+
Dynamic: license-file
|
|
79
|
+
|
|
80
|
+
# SynBO: Synthetic Bayesian Optimization for Reaction Condition Screening
|
|
81
|
+
|
|
82
|
+
[](https://www.python.org/downloads/release/python-312/)
|
|
83
|
+
[](https://opensource.org/licenses/MIT)
|
|
84
|
+
|
|
85
|
+
**SynBO** (Synthetic Bayesian Optimization) is an intelligent reaction optimization tool designed specifically for synthetic chemists. It uses Bayesian Optimization (BO) algorithms to help you find optimal reaction conditions with minimal experimental effort.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Why Do Chemists Need SynBO?
|
|
90
|
+
|
|
91
|
+
Optimizing a new chemical reaction typically involves screening numerous combinations of reaction conditions:
|
|
92
|
+
|
|
93
|
+
- **Catalysts** (various organocatalysts or metal complexes)
|
|
94
|
+
- **Solvents**
|
|
95
|
+
- **Bases/Additives** (acids/bases, ligands, electrolyte etc.)
|
|
96
|
+
- **Temperature**
|
|
97
|
+
- **Concentration**
|
|
98
|
+
- **reaction time**, etc.
|
|
99
|
+
|
|
100
|
+
The traditional approach is **OFAT** (One-Factor-At-A-Time). But with 5 catalysts ร 5 solvents ร 4 bases ร 4 temperatures = **400 combinations**, this is clearly impractical.
|
|
101
|
+
|
|
102
|
+
**SynBO's Solution**: Like an experienced chemist, it "learns" from previous experiments and "predicts" which conditions are most likely to succeed next. Typically, you only need **50-80 experiments** to find optimal conditions.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## How Does Bayesian Optimization Work?
|
|
107
|
+
|
|
108
|
+
Imagine you are a mountain climber searching for the highest peak in the dark:
|
|
109
|
+
|
|
110
|
+
1. **Initialization**: Take a few random steps and record the altitude (corresponds to: randomly run a few experimeecord yield/selectivity)
|
|
111
|
+
2. **Build a Mental Map**: Based on where you've been, infer the shape of the entire mountain (corresponds to: algorithm learns reaction patterns)
|
|
112
|
+
3. **Intelligent Decision**: Go to places that might be higher (exploitation), but also explore unknown areas (exploration)
|
|
113
|
+
4. **Iterate**: Repeat steps 2-3 until you find the highest peak (corresponds to: finding optimal reaction conditions)
|
|
114
|
+
|
|
115
|
+
**Chemistry Analogy**:
|
|
116
|
+
- Just like when you optimize reactions in the lab, adjusting your strategy based on previous rounds
|
|
117
|
+
- If a particular catalyst performs well, you'll try similar catalysts (exploitation)
|
|
118
|
+
- But you'll also try some conditions that look different, in case you miss something better (exploration)
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## ๐ Quick Start
|
|
123
|
+
|
|
124
|
+
### Installation
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
# Requires Python 3.12 or higher
|
|
128
|
+
pip install synbo
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Basic Example: Optimizing a Coupling Reaction
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
from synbo import ReactionOptimizer
|
|
135
|
+
import pandas as pd
|
|
136
|
+
|
|
137
|
+
# 1. Create optimizer and specify objectives
|
|
138
|
+
optimizer = ReactionOptimizer(
|
|
139
|
+
opt_metrics=['yield', 'ee'], # Optimize both yield and enantioselectivity
|
|
140
|
+
opt_type='auto', # Auto-detect init or optimization phase
|
|
141
|
+
random_seed=42
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# 2. Define reaction space (all possible condition combinations)
|
|
145
|
+
condition_dict = {
|
|
146
|
+
'catalyst': ['Pd(OAc)2', 'Pd(PPh3)4', 'Pd2(dba)3', 'Xantphos-Pd'],
|
|
147
|
+
'solvent': ['THF', 'Dioxane', 'Toluene', 'DMF', 'MeCN'],
|
|
148
|
+
'base': ['Cs2CO3', 'K2CO3', 'NaOEt', 'DBU', 'Et3N'],
|
|
149
|
+
'temperature': [25, 50, 80, 100]
|
|
150
|
+
}
|
|
151
|
+
optimizer.load_rxn_space(condition_dict)
|
|
152
|
+
|
|
153
|
+
# 3. Load molecular descriptors (optional, for more accurate predictions)
|
|
154
|
+
# If not provided, system will automatically use OneHot encoding
|
|
155
|
+
optimizer.load_desc()
|
|
156
|
+
|
|
157
|
+
# 4. Run first batch of experiments (recommend 5-10, Latin Hypercube Sampling)
|
|
158
|
+
optimizer.run(batch_size=8)
|
|
159
|
+
|
|
160
|
+
# 5. Save recommended experimental conditions
|
|
161
|
+
optimizer.save_results(filetype='csv') # Generates "recommended_batch_0.csv"
|
|
162
|
+
|
|
163
|
+
# ============================================
|
|
164
|
+
# After completing these experiments in lab, fill results into CSV
|
|
165
|
+
# ============================================
|
|
166
|
+
|
|
167
|
+
# 6. Load completed experimental results
|
|
168
|
+
results = pd.read_csv('experimental_results.csv') # Must contain 'yield' and 'ee' columns
|
|
169
|
+
optimizer.load_prev_rxn(results)
|
|
170
|
+
|
|
171
|
+
# 7. Continue optimization, algorithm recommends next batch based on data
|
|
172
|
+
optimizer.run(batch_size=5)
|
|
173
|
+
optimizer.save_results(filetype='csv') # Generates "recommended_batch_1.csv"
|
|
174
|
+
|
|
175
|
+
# Repeat steps 6-7 until satisfactory yield and selectivity are achieved
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Single-Objective Optimization (Yield Only)
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
optimizer = ReactionOptimizer(
|
|
182
|
+
opt_metrics='yield', # Only optimize yield
|
|
183
|
+
opt_metric_settings={
|
|
184
|
+
'opt_direct': 'max', # Maximize
|
|
185
|
+
'opt_range': [0, 100], # Yield range 0-100%
|
|
186
|
+
'metric_weight': 1.0
|
|
187
|
+
}
|
|
188
|
+
)
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Multi-Objective Optimization (Yield + Enantioselectivity)
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
optimizer = ReactionOptimizer(
|
|
195
|
+
opt_metrics=['yield', 'ee'],
|
|
196
|
+
opt_metric_settings=[
|
|
197
|
+
{'opt_direct': 'max', 'opt_range': [0, 100], 'metric_weight': 1.0}, # Yield
|
|
198
|
+
{'opt_direct': 'max', 'opt_range': [0, 100], 'metric_weight': 2.0} # ee, higher weight
|
|
199
|
+
]
|
|
200
|
+
)
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## ๐ฌ Advanced Features
|
|
206
|
+
|
|
207
|
+
### 1. LLM-Powered Analysis of Failed Experiments
|
|
208
|
+
|
|
209
|
+
When certain condition combinations repeatedly fail, SynBO can call a Large Language Model (LLM) to analyze the causes and automatically exclude these "problematic reagents":
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
# After round 3, let AI analyze which conditions to avoid
|
|
213
|
+
constraints = optimizer.get_constraints(method='llm')
|
|
214
|
+
|
|
215
|
+
# Apply constraints to next round of optimization
|
|
216
|
+
optimizer.run(batch_size=5, constraints=constraints)
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
**Application Scenarios**:
|
|
220
|
+
- Discover "DBU + high temperature" always leads to decomposition โ Auto-exclude
|
|
221
|
+
- Discover "toluene solvent" works best with specific catalyst โ Prioritize similar combinations
|
|
222
|
+
|
|
223
|
+
### 2. Track Optimization Progress (Hypervolume)
|
|
224
|
+
|
|
225
|
+
In multi-objective optimization, the Hypervolume metric helps you determine if you're approaching the optimum:
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
# Calculate Hypervolume for current Pareto front
|
|
229
|
+
hv = optimizer.calculate_current_hv()
|
|
230
|
+
print(f"Current optimization progress: {hv['hv_normalized']*100:.1f}%")
|
|
231
|
+
|
|
232
|
+
# View progress across rounds
|
|
233
|
+
progress = optimizer.calculate_hv_by_batch()
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
**Chemistry Explanation**:
|
|
237
|
+
- Hypervolume measures the "performance space" covered by currently found optimal conditions
|
|
238
|
+
- When Hypervolume growth slows down, you're near optimal and can consider stopping experiments
|
|
239
|
+
|
|
240
|
+
### 3. Choose Different Optimization Strategies
|
|
241
|
+
|
|
242
|
+
```python
|
|
243
|
+
# Standard Bayesian Optimization (Recommended)
|
|
244
|
+
optimizer.run(optimize_method='default_BO')
|
|
245
|
+
|
|
246
|
+
# Particle Swarm (suitable for complex nonlinear relationships)
|
|
247
|
+
optimizer.run(optimize_method='particle_swarm')
|
|
248
|
+
|
|
249
|
+
# Evolutionary Algorithm (suitable for discrete space search)
|
|
250
|
+
optimizer.run(optimize_method='evolution')
|
|
251
|
+
|
|
252
|
+
# Random Search (baseline comparison)
|
|
253
|
+
optimizer.run(optimize_method='random_select')
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## ๐ The Chemistry Behind the Algorithms
|
|
259
|
+
|
|
260
|
+
### Surrogate Models โ "Predicting Reaction Outcomes"
|
|
261
|
+
|
|
262
|
+
| Model | Chemistry Intuition | Best For |
|
|
263
|
+
|-------|---------------------|----------|
|
|
264
|
+
| **GP (Gaussian Process)** | Assumes similar conditions give similar results | Fewer experiments (<50), clear reaction mechanisms |
|
|
265
|
+
| **Random Forest** | Voting via multiple decision trees | Many categorical variables (many catalyst/solvent types) |
|
|
266
|
+
| **BNN (Neural Network Ensemble)** | Deep learning for complex nonlinear relationships | Large-scale high-throughput screening (>100 experiments) |
|
|
267
|
+
| **Bayesian Linear** | Linear approximation, fast but simple | Preliminary screening, need quick results |
|
|
268
|
+
|
|
269
|
+
### Acquisition Functions โ "Choosing the Next Experiment"
|
|
270
|
+
|
|
271
|
+
| Function | Chemistry Strategy | When to Use |
|
|
272
|
+
|----------|-------------------|-------------|
|
|
273
|
+
| **EHVI** (Default) | Balance yield and selectivity, find Pareto optimal frontier | Optimizing yield and ee simultaneously, both important |
|
|
274
|
+
| **UCB** | Conservative strategy, prioritize high-yield conditions with certainty | Limited time, cannot afford failures |
|
|
275
|
+
| **ParEGO** | Transform multi-objective into single-objective | More than 2 objectives (e.g., yield + ee + cost) |
|
|
276
|
+
| **NEI** | Account for experimental error | High variability in replicate experiments |
|
|
277
|
+
|
|
278
|
+
---
|
|
279
|
+
|
|
280
|
+
## ๐ Real-World Case Studies
|
|
281
|
+
|
|
282
|
+
### Case 1: Asymmetric Hydrogenation
|
|
283
|
+
|
|
284
|
+
**Background**: Screening chiral phosphoric acid catalysts for imine asymmetric hydrogenation
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
condition_dict = {
|
|
288
|
+
'catalyst': ['CPA-1', 'CPA-2', 'CPA-3', 'CPA-4', 'CPA-5', 'CPA-6'],
|
|
289
|
+
'additive': ['MsOH', 'TfOH', 'TFA', 'None'],
|
|
290
|
+
'solvent': ['DCE', 'PhCF3', 'Toluene', 'Et2O'],
|
|
291
|
+
'temperature': [-20, 0, 25, 40],
|
|
292
|
+
'H2_pressure': [1, 10, 20, 50] # atm
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
# Optimization objective: High yield + High ee
|
|
296
|
+
optimizer = ReactionOptimizer(
|
|
297
|
+
opt_metrics=['yield', 'ee'],
|
|
298
|
+
opt_type='auto'
|
|
299
|
+
)
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
**Result**: Only 24 experiments needed (vs. 384 full combinations), found conditions with 94% yield and 98% ee.
|
|
303
|
+
|
|
304
|
+
### Case 2: Buchwald-Hartwig Amination
|
|
305
|
+
|
|
306
|
+
**Background**: Pd-catalyzed aromatic amination, screening ligand and base combinations
|
|
307
|
+
|
|
308
|
+
```python
|
|
309
|
+
# Use LLM to analyze failed ligand-base combinations
|
|
310
|
+
constraints = optimizer.get_constraints(method='llm')
|
|
311
|
+
# LLM identifies "XPhos + strong base" leads to catalyst deactivation
|
|
312
|
+
# Automatically excludes these combinations, saving experimental time
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
---
|
|
316
|
+
|
|
317
|
+
## ๐ง Project Structure
|
|
318
|
+
|
|
319
|
+
```
|
|
320
|
+
synbo/
|
|
321
|
+
โโโ synbo.py # Main optimizer class
|
|
322
|
+
โโโ initialize.py # Initial sampling strategies (Latin Hypercube, etc.)
|
|
323
|
+
โโโ optimize.py # Optimization algorithm dispatcher
|
|
324
|
+
โโโ algorithm/
|
|
325
|
+
โ โโโ bo_core.py # Bayesian optimization core
|
|
326
|
+
โ โโโ acq_function.py # Acquisition functions (EHVI/UCB/ParEGO/NEI)
|
|
327
|
+
โ โโโ sg_model.py # Surrogate models (GP/RF/BNN)
|
|
328
|
+
โ โโโ evolution.py # Evolutionary algorithm
|
|
329
|
+
โ โโโ particle_swarm.py # Particle swarm algorithm
|
|
330
|
+
โโโ descriptor/ # Molecular descriptor processing (RDKit support)
|
|
331
|
+
โโโ analysis/ # LLM-powered analysis module
|
|
332
|
+
โโโ utils/ # Utility functions (visualization, I/O, etc.)
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
---
|
|
336
|
+
|
|
337
|
+
## ๐ Citation
|
|
338
|
+
|
|
339
|
+
If you use SynBO in your research, please cite:
|
|
340
|
+
|
|
341
|
+
```bibtex
|
|
342
|
+
@software{synbo2025,
|
|
343
|
+
title={SynBO: Synthetic Bayesian Optimization for Chemical Reaction Optimization},
|
|
344
|
+
author={Zhenzhi Tan},
|
|
345
|
+
year={2025},
|
|
346
|
+
url={https://github.com/yourusername/synbo}
|
|
347
|
+
}
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
---
|
|
351
|
+
|
|
352
|
+
## ๐ค Contributing
|
|
353
|
+
|
|
354
|
+
Issues and Pull Requests are welcome! For synthetic chemistry-related feature suggestions, please describe your reaction type and optimization needs in detail.
|
|
355
|
+
|
|
356
|
+
---
|
|
357
|
+
|
|
358
|
+
## ๐ง Contact
|
|
359
|
+
|
|
360
|
+
- **Author**: Zhenzhi Tan
|
|
361
|
+
- **Email**: zhenzhi-tan@outlook.com
|
|
362
|
+
|
|
363
|
+
---
|
|
364
|
+
|
|
365
|
+
**Happy Synthesizing! ๐งชโ๏ธ**
|
synbo-0.1.0/README.md
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
# SynBO: Synthetic Bayesian Optimization for Reaction Condition Screening
|
|
2
|
+
|
|
3
|
+
[](https://www.python.org/downloads/release/python-312/)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
|
|
6
|
+
**SynBO** (Synthetic Bayesian Optimization) is an intelligent reaction optimization tool designed specifically for synthetic chemists. It uses Bayesian Optimization (BO) algorithms to help you find optimal reaction conditions with minimal experimental effort.
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## Why Do Chemists Need SynBO?
|
|
11
|
+
|
|
12
|
+
Optimizing a new chemical reaction typically involves screening numerous combinations of reaction conditions:
|
|
13
|
+
|
|
14
|
+
- **Catalysts** (various organocatalysts or metal complexes)
|
|
15
|
+
- **Solvents**
|
|
16
|
+
- **Bases/Additives** (acids/bases, ligands, electrolyte etc.)
|
|
17
|
+
- **Temperature**
|
|
18
|
+
- **Concentration**
|
|
19
|
+
- **reaction time**, etc.
|
|
20
|
+
|
|
21
|
+
The traditional approach is **OFAT** (One-Factor-At-A-Time). But with 5 catalysts ร 5 solvents ร 4 bases ร 4 temperatures = **400 combinations**, this is clearly impractical.
|
|
22
|
+
|
|
23
|
+
**SynBO's Solution**: Like an experienced chemist, it "learns" from previous experiments and "predicts" which conditions are most likely to succeed next. Typically, you only need **50-80 experiments** to find optimal conditions.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## How Does Bayesian Optimization Work?
|
|
28
|
+
|
|
29
|
+
Imagine you are a mountain climber searching for the highest peak in the dark:
|
|
30
|
+
|
|
31
|
+
1. **Initialization**: Take a few random steps and record the altitude (corresponds to: randomly run a few experimeecord yield/selectivity)
|
|
32
|
+
2. **Build a Mental Map**: Based on where you've been, infer the shape of the entire mountain (corresponds to: algorithm learns reaction patterns)
|
|
33
|
+
3. **Intelligent Decision**: Go to places that might be higher (exploitation), but also explore unknown areas (exploration)
|
|
34
|
+
4. **Iterate**: Repeat steps 2-3 until you find the highest peak (corresponds to: finding optimal reaction conditions)
|
|
35
|
+
|
|
36
|
+
**Chemistry Analogy**:
|
|
37
|
+
- Just like when you optimize reactions in the lab, adjusting your strategy based on previous rounds
|
|
38
|
+
- If a particular catalyst performs well, you'll try similar catalysts (exploitation)
|
|
39
|
+
- But you'll also try some conditions that look different, in case you miss something better (exploration)
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## ๐ Quick Start
|
|
44
|
+
|
|
45
|
+
### Installation
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
# Requires Python 3.12 or higher
|
|
49
|
+
pip install synbo
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Basic Example: Optimizing a Coupling Reaction
|
|
53
|
+
|
|
54
|
+
```python
|
|
55
|
+
from synbo import ReactionOptimizer
|
|
56
|
+
import pandas as pd
|
|
57
|
+
|
|
58
|
+
# 1. Create optimizer and specify objectives
|
|
59
|
+
optimizer = ReactionOptimizer(
|
|
60
|
+
opt_metrics=['yield', 'ee'], # Optimize both yield and enantioselectivity
|
|
61
|
+
opt_type='auto', # Auto-detect init or optimization phase
|
|
62
|
+
random_seed=42
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# 2. Define reaction space (all possible condition combinations)
|
|
66
|
+
condition_dict = {
|
|
67
|
+
'catalyst': ['Pd(OAc)2', 'Pd(PPh3)4', 'Pd2(dba)3', 'Xantphos-Pd'],
|
|
68
|
+
'solvent': ['THF', 'Dioxane', 'Toluene', 'DMF', 'MeCN'],
|
|
69
|
+
'base': ['Cs2CO3', 'K2CO3', 'NaOEt', 'DBU', 'Et3N'],
|
|
70
|
+
'temperature': [25, 50, 80, 100]
|
|
71
|
+
}
|
|
72
|
+
optimizer.load_rxn_space(condition_dict)
|
|
73
|
+
|
|
74
|
+
# 3. Load molecular descriptors (optional, for more accurate predictions)
|
|
75
|
+
# If not provided, system will automatically use OneHot encoding
|
|
76
|
+
optimizer.load_desc()
|
|
77
|
+
|
|
78
|
+
# 4. Run first batch of experiments (recommend 5-10, Latin Hypercube Sampling)
|
|
79
|
+
optimizer.run(batch_size=8)
|
|
80
|
+
|
|
81
|
+
# 5. Save recommended experimental conditions
|
|
82
|
+
optimizer.save_results(filetype='csv') # Generates "recommended_batch_0.csv"
|
|
83
|
+
|
|
84
|
+
# ============================================
|
|
85
|
+
# After completing these experiments in lab, fill results into CSV
|
|
86
|
+
# ============================================
|
|
87
|
+
|
|
88
|
+
# 6. Load completed experimental results
|
|
89
|
+
results = pd.read_csv('experimental_results.csv') # Must contain 'yield' and 'ee' columns
|
|
90
|
+
optimizer.load_prev_rxn(results)
|
|
91
|
+
|
|
92
|
+
# 7. Continue optimization, algorithm recommends next batch based on data
|
|
93
|
+
optimizer.run(batch_size=5)
|
|
94
|
+
optimizer.save_results(filetype='csv') # Generates "recommended_batch_1.csv"
|
|
95
|
+
|
|
96
|
+
# Repeat steps 6-7 until satisfactory yield and selectivity are achieved
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Single-Objective Optimization (Yield Only)
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
optimizer = ReactionOptimizer(
|
|
103
|
+
opt_metrics='yield', # Only optimize yield
|
|
104
|
+
opt_metric_settings={
|
|
105
|
+
'opt_direct': 'max', # Maximize
|
|
106
|
+
'opt_range': [0, 100], # Yield range 0-100%
|
|
107
|
+
'metric_weight': 1.0
|
|
108
|
+
}
|
|
109
|
+
)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Multi-Objective Optimization (Yield + Enantioselectivity)
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
optimizer = ReactionOptimizer(
|
|
116
|
+
opt_metrics=['yield', 'ee'],
|
|
117
|
+
opt_metric_settings=[
|
|
118
|
+
{'opt_direct': 'max', 'opt_range': [0, 100], 'metric_weight': 1.0}, # Yield
|
|
119
|
+
{'opt_direct': 'max', 'opt_range': [0, 100], 'metric_weight': 2.0} # ee, higher weight
|
|
120
|
+
]
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## ๐ฌ Advanced Features
|
|
127
|
+
|
|
128
|
+
### 1. LLM-Powered Analysis of Failed Experiments
|
|
129
|
+
|
|
130
|
+
When certain condition combinations repeatedly fail, SynBO can call a Large Language Model (LLM) to analyze the causes and automatically exclude these "problematic reagents":
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
# After round 3, let AI analyze which conditions to avoid
|
|
134
|
+
constraints = optimizer.get_constraints(method='llm')
|
|
135
|
+
|
|
136
|
+
# Apply constraints to next round of optimization
|
|
137
|
+
optimizer.run(batch_size=5, constraints=constraints)
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
**Application Scenarios**:
|
|
141
|
+
- Discover "DBU + high temperature" always leads to decomposition โ Auto-exclude
|
|
142
|
+
- Discover "toluene solvent" works best with specific catalyst โ Prioritize similar combinations
|
|
143
|
+
|
|
144
|
+
### 2. Track Optimization Progress (Hypervolume)
|
|
145
|
+
|
|
146
|
+
In multi-objective optimization, the Hypervolume metric helps you determine if you're approaching the optimum:
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
# Calculate Hypervolume for current Pareto front
|
|
150
|
+
hv = optimizer.calculate_current_hv()
|
|
151
|
+
print(f"Current optimization progress: {hv['hv_normalized']*100:.1f}%")
|
|
152
|
+
|
|
153
|
+
# View progress across rounds
|
|
154
|
+
progress = optimizer.calculate_hv_by_batch()
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
**Chemistry Explanation**:
|
|
158
|
+
- Hypervolume measures the "performance space" covered by currently found optimal conditions
|
|
159
|
+
- When Hypervolume growth slows down, you're near optimal and can consider stopping experiments
|
|
160
|
+
|
|
161
|
+
### 3. Choose Different Optimization Strategies
|
|
162
|
+
|
|
163
|
+
```python
|
|
164
|
+
# Standard Bayesian Optimization (Recommended)
|
|
165
|
+
optimizer.run(optimize_method='default_BO')
|
|
166
|
+
|
|
167
|
+
# Particle Swarm (suitable for complex nonlinear relationships)
|
|
168
|
+
optimizer.run(optimize_method='particle_swarm')
|
|
169
|
+
|
|
170
|
+
# Evolutionary Algorithm (suitable for discrete space search)
|
|
171
|
+
optimizer.run(optimize_method='evolution')
|
|
172
|
+
|
|
173
|
+
# Random Search (baseline comparison)
|
|
174
|
+
optimizer.run(optimize_method='random_select')
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
## ๐ The Chemistry Behind the Algorithms
|
|
180
|
+
|
|
181
|
+
### Surrogate Models โ "Predicting Reaction Outcomes"
|
|
182
|
+
|
|
183
|
+
| Model | Chemistry Intuition | Best For |
|
|
184
|
+
|-------|---------------------|----------|
|
|
185
|
+
| **GP (Gaussian Process)** | Assumes similar conditions give similar results | Fewer experiments (<50), clear reaction mechanisms |
|
|
186
|
+
| **Random Forest** | Voting via multiple decision trees | Many categorical variables (many catalyst/solvent types) |
|
|
187
|
+
| **BNN (Neural Network Ensemble)** | Deep learning for complex nonlinear relationships | Large-scale high-throughput screening (>100 experiments) |
|
|
188
|
+
| **Bayesian Linear** | Linear approximation, fast but simple | Preliminary screening, need quick results |
|
|
189
|
+
|
|
190
|
+
### Acquisition Functions โ "Choosing the Next Experiment"
|
|
191
|
+
|
|
192
|
+
| Function | Chemistry Strategy | When to Use |
|
|
193
|
+
|----------|-------------------|-------------|
|
|
194
|
+
| **EHVI** (Default) | Balance yield and selectivity, find Pareto optimal frontier | Optimizing yield and ee simultaneously, both important |
|
|
195
|
+
| **UCB** | Conservative strategy, prioritize high-yield conditions with certainty | Limited time, cannot afford failures |
|
|
196
|
+
| **ParEGO** | Transform multi-objective into single-objective | More than 2 objectives (e.g., yield + ee + cost) |
|
|
197
|
+
| **NEI** | Account for experimental error | High variability in replicate experiments |
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## ๐ Real-World Case Studies
|
|
202
|
+
|
|
203
|
+
### Case 1: Asymmetric Hydrogenation
|
|
204
|
+
|
|
205
|
+
**Background**: Screening chiral phosphoric acid catalysts for imine asymmetric hydrogenation
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
condition_dict = {
|
|
209
|
+
'catalyst': ['CPA-1', 'CPA-2', 'CPA-3', 'CPA-4', 'CPA-5', 'CPA-6'],
|
|
210
|
+
'additive': ['MsOH', 'TfOH', 'TFA', 'None'],
|
|
211
|
+
'solvent': ['DCE', 'PhCF3', 'Toluene', 'Et2O'],
|
|
212
|
+
'temperature': [-20, 0, 25, 40],
|
|
213
|
+
'H2_pressure': [1, 10, 20, 50] # atm
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
# Optimization objective: High yield + High ee
|
|
217
|
+
optimizer = ReactionOptimizer(
|
|
218
|
+
opt_metrics=['yield', 'ee'],
|
|
219
|
+
opt_type='auto'
|
|
220
|
+
)
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**Result**: Only 24 experiments needed (vs. 384 full combinations), found conditions with 94% yield and 98% ee.
|
|
224
|
+
|
|
225
|
+
### Case 2: Buchwald-Hartwig Amination
|
|
226
|
+
|
|
227
|
+
**Background**: Pd-catalyzed aromatic amination, screening ligand and base combinations
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
# Use LLM to analyze failed ligand-base combinations
|
|
231
|
+
constraints = optimizer.get_constraints(method='llm')
|
|
232
|
+
# LLM identifies "XPhos + strong base" leads to catalyst deactivation
|
|
233
|
+
# Automatically excludes these combinations, saving experimental time
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## ๐ง Project Structure
|
|
239
|
+
|
|
240
|
+
```
|
|
241
|
+
synbo/
|
|
242
|
+
โโโ synbo.py # Main optimizer class
|
|
243
|
+
โโโ initialize.py # Initial sampling strategies (Latin Hypercube, etc.)
|
|
244
|
+
โโโ optimize.py # Optimization algorithm dispatcher
|
|
245
|
+
โโโ algorithm/
|
|
246
|
+
โ โโโ bo_core.py # Bayesian optimization core
|
|
247
|
+
โ โโโ acq_function.py # Acquisition functions (EHVI/UCB/ParEGO/NEI)
|
|
248
|
+
โ โโโ sg_model.py # Surrogate models (GP/RF/BNN)
|
|
249
|
+
โ โโโ evolution.py # Evolutionary algorithm
|
|
250
|
+
โ โโโ particle_swarm.py # Particle swarm algorithm
|
|
251
|
+
โโโ descriptor/ # Molecular descriptor processing (RDKit support)
|
|
252
|
+
โโโ analysis/ # LLM-powered analysis module
|
|
253
|
+
โโโ utils/ # Utility functions (visualization, I/O, etc.)
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
---
|
|
257
|
+
|
|
258
|
+
## ๐ Citation
|
|
259
|
+
|
|
260
|
+
If you use SynBO in your research, please cite:
|
|
261
|
+
|
|
262
|
+
```bibtex
|
|
263
|
+
@software{synbo2025,
|
|
264
|
+
title={SynBO: Synthetic Bayesian Optimization for Chemical Reaction Optimization},
|
|
265
|
+
author={Zhenzhi Tan},
|
|
266
|
+
year={2025},
|
|
267
|
+
url={https://github.com/yourusername/synbo}
|
|
268
|
+
}
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
---
|
|
272
|
+
|
|
273
|
+
## ๐ค Contributing
|
|
274
|
+
|
|
275
|
+
Issues and Pull Requests are welcome! For synthetic chemistry-related feature suggestions, please describe your reaction type and optimization needs in detail.
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
## ๐ง Contact
|
|
280
|
+
|
|
281
|
+
- **Author**: Zhenzhi Tan
|
|
282
|
+
- **Email**: zhenzhi-tan@outlook.com
|
|
283
|
+
|
|
284
|
+
---
|
|
285
|
+
|
|
286
|
+
**Happy Synthesizing! ๐งชโ๏ธ**
|