synkit 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synkit/Chem/Fingerprint/__init__.py +0 -0
- synkit/Chem/Fingerprint/fp_calculator.py +122 -0
- synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
- synkit/Chem/Fingerprint/transformation_fp.py +79 -0
- synkit/Chem/Molecule/__init__.py +0 -0
- synkit/Chem/Molecule/standardize.py +137 -0
- synkit/Chem/Reaction/__init__.py +0 -0
- synkit/Chem/Reaction/balance_check.py +162 -0
- synkit/Chem/Reaction/cleanning.py +59 -0
- synkit/Chem/Reaction/deionize.py +289 -0
- synkit/Chem/Reaction/neutralize.py +256 -0
- synkit/Chem/Reaction/reagent.py +102 -0
- synkit/Chem/Reaction/standardize.py +157 -0
- synkit/Chem/Reaction/tautomerize.py +168 -0
- synkit/Graph/Cluster/__init__.py +0 -0
- synkit/Graph/Cluster/morphism.py +83 -0
- synkit/Graph/Feature/__init__.py +0 -0
- synkit/Graph/Feature/graph_descriptors.py +325 -0
- synkit/Graph/Feature/graph_fps.py +97 -0
- synkit/Graph/Feature/graph_signature.py +236 -0
- synkit/Graph/Feature/hash_fps.py +130 -0
- synkit/Graph/Feature/morgan_fps.py +87 -0
- synkit/Graph/Feature/path_fps.py +82 -0
- synkit/Graph/__init.py +0 -0
- synkit/IO/__init__.py +0 -0
- synkit/IO/chem_converter.py +231 -0
- synkit/IO/data_io.py +277 -0
- synkit/IO/data_process.py +49 -0
- synkit/IO/debug.py +78 -0
- synkit/IO/dg_to_gml.py +124 -0
- synkit/IO/gml_to_nx.py +119 -0
- synkit/IO/graph_to_mol.py +110 -0
- synkit/IO/mol_to_graph.py +282 -0
- synkit/IO/nx_to_gml.py +200 -0
- synkit/IO/parse_rule.py +172 -0
- synkit/IO/smiles_to_id.py +119 -0
- synkit/ITS/_misc.py +280 -0
- synkit/ITS/aam_validator.py +254 -0
- synkit/ITS/its_builder.py +94 -0
- synkit/ITS/its_construction.py +213 -0
- synkit/ITS/normalize_aam.py +183 -0
- synkit/ITS/partial_expand.py +170 -0
- synkit/Reactor/__init__.py +0 -0
- synkit/Reactor/core_engine.py +164 -0
- synkit/Reactor/inference.py +73 -0
- synkit/Reactor/multi_step.py +227 -0
- synkit/Reactor/multi_step_aam.py +82 -0
- synkit/Reactor/reagent.py +95 -0
- synkit/Reactor/rule_apply.py +81 -0
- synkit/Vis/__init__.py +0 -0
- synkit/Vis/chemical_graph_visualizer.py +378 -0
- synkit/Vis/chemical_reaction_visualizer.py +133 -0
- synkit/Vis/chemical_space.py +83 -0
- synkit/Vis/embedding.py +92 -0
- synkit/Vis/graph_visualizer.py +286 -0
- synkit/Vis/pdf_writer.py +143 -0
- synkit/Vis/rsmi_to_fig.py +169 -0
- synkit/__init__.py +0 -0
- synkit/_misc.py +181 -0
- synkit-0.0.1.dist-info/METADATA +148 -0
- synkit-0.0.1.dist-info/RECORD +63 -0
- synkit-0.0.1.dist-info/WHEEL +4 -0
- synkit-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from synkit.Chem.Reaction.standardize import Standardize
|
|
3
|
+
from synkit.Chem.Reaction.balance_check import BalanceReactionCheck
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Cleanning:
|
|
7
|
+
def __init__(self) -> None:
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
@staticmethod
|
|
11
|
+
def remove_duplicates(smiles_list: List[str]) -> List[str]:
|
|
12
|
+
"""
|
|
13
|
+
Removes duplicate SMILES strings from a list, maintaining the order of
|
|
14
|
+
first occurrences. Uses a set to track seen SMILES for efficiency.
|
|
15
|
+
|
|
16
|
+
Parameters:
|
|
17
|
+
- smiles_list (List[str]): A list of SMILES strings representing
|
|
18
|
+
chemical reactions.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
- List[str]: A list with unique SMILES strings, preserving the original order.
|
|
22
|
+
"""
|
|
23
|
+
seen = set()
|
|
24
|
+
unique_smiles = [
|
|
25
|
+
smiles for smiles in smiles_list if not (smiles in seen or seen.add(smiles))
|
|
26
|
+
]
|
|
27
|
+
return unique_smiles
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def clean_smiles(smiles_list: List[str]) -> List[str]:
|
|
31
|
+
"""
|
|
32
|
+
Cleans a list of SMILES strings by standardizing them, checking their chemical
|
|
33
|
+
balance, and removing duplicates. Each SMILES is first checked for validity and
|
|
34
|
+
then standardized. Only balanced reactions are kept.
|
|
35
|
+
|
|
36
|
+
Parameters:
|
|
37
|
+
- smiles_list (List[str]): A list of SMILES strings representing chemical reactions.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
- List[str]: A list of cleaned and standardized SMILES strings.
|
|
41
|
+
"""
|
|
42
|
+
# Standardize and check balance in separate list comprehensions
|
|
43
|
+
standardizer = Standardize()
|
|
44
|
+
balance_checker = BalanceReactionCheck()
|
|
45
|
+
|
|
46
|
+
standardized_smiles = [
|
|
47
|
+
standardizer.standardize_rsmi(smiles, True)
|
|
48
|
+
for smiles in smiles_list
|
|
49
|
+
if smiles
|
|
50
|
+
]
|
|
51
|
+
balanced_smiles = [
|
|
52
|
+
smiles
|
|
53
|
+
for smiles in standardized_smiles
|
|
54
|
+
if balance_checker.rsmi_balance_check(smiles)
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
# Remove duplicates from the balanced SMILES list
|
|
58
|
+
clean_smiles = Cleanning.remove_duplicates(balanced_smiles)
|
|
59
|
+
return clean_smiles
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from itertools import permutations
|
|
3
|
+
from itertools import combinations
|
|
4
|
+
from joblib import Parallel, delayed
|
|
5
|
+
from typing import List, Tuple, Callable, Dict
|
|
6
|
+
|
|
7
|
+
from rdkit import Chem
|
|
8
|
+
from rdkit.Chem.MolStandardize import rdMolStandardize
|
|
9
|
+
|
|
10
|
+
from synkit.Chem.Reaction.balance_check import BalanceReactionCheck
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Deionize:
|
|
14
|
+
"""
|
|
15
|
+
A class to deionize reactions.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def random_pair_ions(
|
|
20
|
+
charges: List[int], smiles: List[str]
|
|
21
|
+
) -> Tuple[List[List[str]], List[List[int]]]:
|
|
22
|
+
"""
|
|
23
|
+
Generates non-overlapping groups of ions (2, 3, or 4) based on
|
|
24
|
+
their charges and corresponding SMILES representations,
|
|
25
|
+
aiming to maximize the total number of ions used by preferring
|
|
26
|
+
multiple smaller groups over fewer larger groups.
|
|
27
|
+
|
|
28
|
+
Parameters:
|
|
29
|
+
- charges (List[int]): A list of integer charges of the ions.
|
|
30
|
+
- smiles (List[str]): A list of SMILES strings representing the ions.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
- Tuple[List[List[str]], List[List[int]]]: A tuple containing two lists:
|
|
34
|
+
- The first list contains the groups of SMILES strings.
|
|
35
|
+
- The second list contains the groups of charges.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def find_groups(indices, size):
|
|
39
|
+
"""Finds and removes groups of a specific size that sum to zero charge."""
|
|
40
|
+
for group in combinations(indices, size):
|
|
41
|
+
if sum(charges[i] for i in group) == 0:
|
|
42
|
+
return group
|
|
43
|
+
return []
|
|
44
|
+
|
|
45
|
+
# Prepare initial variables
|
|
46
|
+
indices = list(range(len(charges)))
|
|
47
|
+
random.shuffle(indices) # Shuffle indices to ensure variety
|
|
48
|
+
used_indices = set()
|
|
49
|
+
grouped_smiles = []
|
|
50
|
+
grouped_charges = []
|
|
51
|
+
|
|
52
|
+
for group_size in range(
|
|
53
|
+
2, 5
|
|
54
|
+
): # Start with pairs, then triples, and finally quads
|
|
55
|
+
while True:
|
|
56
|
+
group = find_groups(
|
|
57
|
+
[i for i in indices if i not in used_indices], group_size
|
|
58
|
+
)
|
|
59
|
+
if not group:
|
|
60
|
+
break # No more groups of this size can be formed
|
|
61
|
+
grouped_smiles.append([smiles[i] for i in group])
|
|
62
|
+
grouped_charges.append([charges[i] for i in group])
|
|
63
|
+
used_indices.update(group)
|
|
64
|
+
|
|
65
|
+
return grouped_smiles, grouped_charges
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def uncharge_anion(smiles: str, charges: int = -1) -> str:
|
|
69
|
+
"""
|
|
70
|
+
Removes charge from an anionic species represented by a SMILES string.
|
|
71
|
+
|
|
72
|
+
This function uses RDKit's standardization tools to neutralize
|
|
73
|
+
the charges in the molecule. It returns
|
|
74
|
+
the SMILES representation of the uncharged molecule.
|
|
75
|
+
|
|
76
|
+
Parameters::
|
|
77
|
+
- smiles (str): A SMILES string representing the anionic species.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
- str: The SMILES string of the uncharged molecule.
|
|
81
|
+
|
|
82
|
+
Note:
|
|
83
|
+
- The function assumes valid SMILES input.
|
|
84
|
+
"""
|
|
85
|
+
if smiles == "[N-]=[N+]=[N-]":
|
|
86
|
+
return "[N-]=[N+]=[N]"
|
|
87
|
+
if charges == -1:
|
|
88
|
+
# Convert the SMILES string to an RDKit molecule object
|
|
89
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
90
|
+
|
|
91
|
+
# Initialize the uncharger
|
|
92
|
+
uncharger = rdMolStandardize.Uncharger()
|
|
93
|
+
|
|
94
|
+
# Apply the uncharger to the molecule
|
|
95
|
+
uncharged_mol = uncharger.uncharge(mol)
|
|
96
|
+
|
|
97
|
+
# Convert the uncharged molecule back to a SMILES string
|
|
98
|
+
return Chem.MolToSmiles(uncharged_mol)
|
|
99
|
+
|
|
100
|
+
elif charges < -1:
|
|
101
|
+
new_smiles = (
|
|
102
|
+
smiles.replace(f"{charges}", "").replace("[", "").replace("]", "")
|
|
103
|
+
)
|
|
104
|
+
return new_smiles
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def uncharge_cation(smiles: str, charges: int = 1) -> str:
|
|
108
|
+
"""
|
|
109
|
+
Removes charge from a cationic species represented by a SMILES string.
|
|
110
|
+
|
|
111
|
+
This function uses RDKit's standardization tools to neutralize
|
|
112
|
+
the charges in the molecule. It returns the
|
|
113
|
+
SMILES representation of the uncharged molecule.
|
|
114
|
+
|
|
115
|
+
Parameters::
|
|
116
|
+
- smiles (str): A SMILES string representing the cationic species.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
- str: The SMILES string of the uncharged molecule.
|
|
120
|
+
|
|
121
|
+
Note:
|
|
122
|
+
- The function assumes valid SMILES input.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
if charges == 1:
|
|
126
|
+
new_smiles = smiles.replace("+", "")
|
|
127
|
+
elif charges > 1:
|
|
128
|
+
# For multiple positive charges, directly modify the SMILES string
|
|
129
|
+
new_smiles = smiles.replace(f"+{charges}", "")
|
|
130
|
+
return new_smiles
|
|
131
|
+
|
|
132
|
+
@staticmethod
|
|
133
|
+
def uncharge_smiles(charge_smiles: str) -> str:
|
|
134
|
+
"""
|
|
135
|
+
Processes a SMILES string containing ionic and non-ionic parts,
|
|
136
|
+
neutralizes the charges, and returns a modified SMILES string.
|
|
137
|
+
|
|
138
|
+
The function splits the input SMILES string into individual components,
|
|
139
|
+
identifies ionic and non-ionic parts,
|
|
140
|
+
and attempts to neutralize charged ions.
|
|
141
|
+
It then creates permutations of the modified ions and combines them into
|
|
142
|
+
a single SMILES string, ensuring the molecular structure is valid.
|
|
143
|
+
|
|
144
|
+
Parameters::
|
|
145
|
+
- charge_smiles (str): A SMILES string that may contain ionic and non-ionic parts.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
- str: A modified SMILES string with neutralized charges.
|
|
149
|
+
|
|
150
|
+
Note:
|
|
151
|
+
- This function depends on RDKit for molecular operations.
|
|
152
|
+
- The function assumes a valid SMILES input.
|
|
153
|
+
- The 'uncharge_anion' and 'random_pair_ions' functions
|
|
154
|
+
must be defined and accessible.
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
smiles = charge_smiles.split(".")
|
|
158
|
+
charges = [Chem.rdmolops.GetFormalCharge(Chem.MolFromSmiles(i)) for i in smiles]
|
|
159
|
+
|
|
160
|
+
if all(charge == 0 for charge in charges):
|
|
161
|
+
return charge_smiles
|
|
162
|
+
|
|
163
|
+
valid_smiles, non_ionic_smiles = [], []
|
|
164
|
+
original_ionic_parts, original_ion_charges = [], []
|
|
165
|
+
|
|
166
|
+
# Splitting the SMILES into ionic and non-ionic parts
|
|
167
|
+
for smile, charge in zip(smiles, charges):
|
|
168
|
+
if charge == 0:
|
|
169
|
+
non_ionic_smiles.append(smile)
|
|
170
|
+
else:
|
|
171
|
+
original_ionic_parts.append(smile)
|
|
172
|
+
original_ion_charges.append(charge)
|
|
173
|
+
|
|
174
|
+
valid_smiles.extend(non_ionic_smiles)
|
|
175
|
+
paired_smiles, paired_charges = Deionize.random_pair_ions(
|
|
176
|
+
original_ion_charges, original_ionic_parts
|
|
177
|
+
)
|
|
178
|
+
# Processing each pair of ionic parts
|
|
179
|
+
for i_smile, i_charge in zip(paired_smiles, paired_charges):
|
|
180
|
+
modified_ions = []
|
|
181
|
+
for ion, charge in zip(i_smile, i_charge):
|
|
182
|
+
if int(charge) > 0:
|
|
183
|
+
new_ion = Deionize.uncharge_cation(ion, charge)
|
|
184
|
+
modified_ions.append(new_ion)
|
|
185
|
+
elif int(charge) < 0:
|
|
186
|
+
new_ion = Deionize.uncharge_anion(ion, charge)
|
|
187
|
+
modified_ions.append(new_ion)
|
|
188
|
+
# Creating permutations of the modified ions
|
|
189
|
+
check_merge = False
|
|
190
|
+
for perm in permutations(modified_ions):
|
|
191
|
+
combined_ionic = "".join(perm)
|
|
192
|
+
if Chem.MolFromSmiles(combined_ionic):
|
|
193
|
+
coordinate_pattern = ["->", "<-"]
|
|
194
|
+
if all(
|
|
195
|
+
pattern not in Chem.CanonSmiles(combined_ionic)
|
|
196
|
+
for pattern in coordinate_pattern
|
|
197
|
+
):
|
|
198
|
+
valid_smiles.append(Chem.CanonSmiles(combined_ionic))
|
|
199
|
+
check_merge = True
|
|
200
|
+
break
|
|
201
|
+
if check_merge is False:
|
|
202
|
+
valid_smiles.extend(i_smile)
|
|
203
|
+
return ".".join(valid_smiles)
|
|
204
|
+
|
|
205
|
+
@staticmethod
|
|
206
|
+
def ammonia_hydroxide_standardize(reaction_smiles: str) -> str:
|
|
207
|
+
"""
|
|
208
|
+
Replaces occurrences of ammonium hydroxide (NH4+ and OH-) in a
|
|
209
|
+
reaction SMILES string with a simplified representation (N.O or O.N).
|
|
210
|
+
|
|
211
|
+
Parameters::
|
|
212
|
+
reaction_smiles (str): The reaction SMILES string to be standardized.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
str: The standardized reaction SMILES string with
|
|
216
|
+
ammonium hydroxide represented as 'N.O' or 'O.N'.
|
|
217
|
+
"""
|
|
218
|
+
# Simplify the representation of ammonium hydroxide in the reaction SMILES
|
|
219
|
+
new_smiles = reaction_smiles.replace("[NH4+].[OH-]", "N.O").replace(
|
|
220
|
+
"[OH-].[NH4+]", "O.N"
|
|
221
|
+
)
|
|
222
|
+
return new_smiles
|
|
223
|
+
|
|
224
|
+
@classmethod
|
|
225
|
+
def apply_uncharge_smiles_to_reactions(
|
|
226
|
+
cls,
|
|
227
|
+
reactions: List[Dict[str, str]],
|
|
228
|
+
uncharge_smiles_func: Callable[[str], str],
|
|
229
|
+
n_jobs: int = 4,
|
|
230
|
+
) -> List[Dict[str, str]]:
|
|
231
|
+
"""
|
|
232
|
+
Applies a given uncharge SMILES function to the reactants
|
|
233
|
+
and products of a list of chemical reactions,
|
|
234
|
+
parallelizing the process for improved performance.
|
|
235
|
+
Each reaction is expected to be a dictionary
|
|
236
|
+
with at least 'reactants' and 'products' keys.
|
|
237
|
+
The function adds three new keys to each reaction
|
|
238
|
+
dictionary: 'uncharged_reactants', 'uncharged_products',
|
|
239
|
+
and 'uncharged_reactions', containing
|
|
240
|
+
the uncharged SMILES strings of reactants, products,
|
|
241
|
+
and the overall reaction, respectively.
|
|
242
|
+
|
|
243
|
+
Parameters::
|
|
244
|
+
- reactions (List[Dict[str, str]]): A list of dictionaries, where each dictionary
|
|
245
|
+
represents a chemical reaction with 'reactants' and 'products' keys.
|
|
246
|
+
- uncharge_smiles_func (Callable[[str], str]): A function that takes a SMILES
|
|
247
|
+
string as input and returns a modified SMILES string with neutralized charges.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
- List[Dict[str, str]]: The input list of reaction dictionaries, modified in-place
|
|
251
|
+
to include 'uncharged_reactants', 'uncharged_products', and 'uncharged_reactions'
|
|
252
|
+
keys.
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
# Define a helper function for processing a single reaction
|
|
256
|
+
def process_reaction(reaction):
|
|
257
|
+
fix_reactants = cls.ammonia_hydroxide_standardize(reaction["reactants"])
|
|
258
|
+
fix_products = cls.ammonia_hydroxide_standardize(reaction["products"])
|
|
259
|
+
|
|
260
|
+
uncharged_reactants = uncharge_smiles_func(fix_reactants)
|
|
261
|
+
uncharged_products = uncharge_smiles_func(fix_products)
|
|
262
|
+
uncharged_reactants_formula = (
|
|
263
|
+
BalanceReactionCheck().get_combined_molecular_formula(
|
|
264
|
+
uncharged_reactants
|
|
265
|
+
)
|
|
266
|
+
)
|
|
267
|
+
uncharged_products_formula = (
|
|
268
|
+
BalanceReactionCheck().get_combined_molecular_formula(
|
|
269
|
+
uncharged_products
|
|
270
|
+
)
|
|
271
|
+
)
|
|
272
|
+
if uncharged_reactants_formula != uncharged_products_formula:
|
|
273
|
+
reaction["success"] = False
|
|
274
|
+
reaction["new_reactants"] = fix_reactants
|
|
275
|
+
reaction["new_products"] = fix_products
|
|
276
|
+
else:
|
|
277
|
+
reaction["success"] = True
|
|
278
|
+
reaction["new_reactants"] = uncharged_reactants
|
|
279
|
+
reaction["new_products"] = uncharged_products
|
|
280
|
+
reaction["standardized_reactions"] = (
|
|
281
|
+
f"{reaction['new_reactants']}>>{reaction['new_products']}"
|
|
282
|
+
)
|
|
283
|
+
return reaction
|
|
284
|
+
|
|
285
|
+
# Use joblib to parallelize the processing of reactions
|
|
286
|
+
reactions = Parallel(n_jobs=n_jobs)(
|
|
287
|
+
delayed(process_reaction)(reaction) for reaction in reactions
|
|
288
|
+
)
|
|
289
|
+
return reactions
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
from rdkit import Chem
|
|
2
|
+
from joblib import Parallel, delayed
|
|
3
|
+
from typing import Dict, Any, List, Union, Tuple
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Neutralize:
|
|
7
|
+
"""
|
|
8
|
+
A class for neutralizing unbalanced charges in a reaction.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def calculate_charge(smiles: str) -> int:
|
|
13
|
+
"""
|
|
14
|
+
Calculates the formal charge of a given molecule represented by a SMILES string.
|
|
15
|
+
|
|
16
|
+
Parameters:
|
|
17
|
+
- smiles (str): A SMILES string representing a molecule.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
- int: The formal charge of the molecule.
|
|
21
|
+
"""
|
|
22
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
23
|
+
if mol is None:
|
|
24
|
+
return 0
|
|
25
|
+
return Chem.rdmolops.GetFormalCharge(mol)
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def parse_reaction(reaction_smiles: str) -> Tuple[str, str]:
|
|
29
|
+
"""
|
|
30
|
+
Parses a reaction SMILES string into reactants and products.
|
|
31
|
+
|
|
32
|
+
Parameters:
|
|
33
|
+
- reaction_smiles (str): A reaction SMILES string of the form
|
|
34
|
+
"reactants>>products".
|
|
35
|
+
|
|
36
|
+
Returns:
|
|
37
|
+
- Tuple[str, str]: A tuple containing the reactants and
|
|
38
|
+
products SMILES strings, respectively.
|
|
39
|
+
|
|
40
|
+
This function uses a while loop and exception handling to
|
|
41
|
+
manage parsing errors and ensure the input is correctly formatted.
|
|
42
|
+
"""
|
|
43
|
+
try:
|
|
44
|
+
reactants, products = reaction_smiles.split(">>")
|
|
45
|
+
return reactants, products
|
|
46
|
+
except ValueError:
|
|
47
|
+
return None, None
|
|
48
|
+
|
|
49
|
+
@staticmethod
|
|
50
|
+
def calculate_charge_dict(
|
|
51
|
+
reaction: Dict[str, str], reaction_column: str
|
|
52
|
+
) -> Dict[str, Union[str, int]]:
|
|
53
|
+
"""
|
|
54
|
+
Calculates and adds the total charge of products in a single reaction.
|
|
55
|
+
|
|
56
|
+
Parameters:
|
|
57
|
+
- reaction (Dict[str, str]): A dictionary representing a reaction with keys
|
|
58
|
+
'R-id' and 'new_reaction'.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
- Dict[str, Union[str, int]]: The same reaction dictionary, with an added key
|
|
62
|
+
'total_charge_in_products' indicating the sum of formal charges in its products.
|
|
63
|
+
"""
|
|
64
|
+
reactants, products = Neutralize.parse_reaction(reaction[reaction_column])
|
|
65
|
+
if reactants is None or products is None:
|
|
66
|
+
reaction.update(
|
|
67
|
+
{"reactants": None, "products": None, "total_charge_in_products": None}
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
reaction["reactants"] = reactants
|
|
71
|
+
reaction["products"] = products
|
|
72
|
+
products = products.split(".")
|
|
73
|
+
total_charge = sum(
|
|
74
|
+
Neutralize.calculate_charge(product) for product in products
|
|
75
|
+
)
|
|
76
|
+
reaction["total_charge_in_products"] = total_charge
|
|
77
|
+
return reaction
|
|
78
|
+
|
|
79
|
+
@staticmethod
|
|
80
|
+
def fix_negative_charge(
|
|
81
|
+
reaction_dict: Dict[str, any],
|
|
82
|
+
charges_column: str = "total_charge_in_products",
|
|
83
|
+
id_column: str = "R-id",
|
|
84
|
+
reaction_column: str = "reactions",
|
|
85
|
+
) -> Dict[str, any]:
|
|
86
|
+
"""
|
|
87
|
+
Adjusts a reaction dictionary to compensate for a negative charge
|
|
88
|
+
in the products by adding [Na+] ions.
|
|
89
|
+
|
|
90
|
+
This function calculates the number of sodium ions ([Na+]) needed to neutralize
|
|
91
|
+
negative charges in the reaction products. It then adds the appropriate number of
|
|
92
|
+
sodium ions to both the reactants and products.
|
|
93
|
+
|
|
94
|
+
Parameters::
|
|
95
|
+
- reaction_dict (Dict[str, any]): A dictionary representing a chemical reaction.
|
|
96
|
+
Must include keys for 'total_charge_in_products', 'reactants', 'products', 'R-id',
|
|
97
|
+
and 'label'.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
- Dict[str, any]: A new reaction dictionary with adjusted reactants and products
|
|
101
|
+
to neutralize the negative charge. The 'total_charge_in_products' is set to 0,
|
|
102
|
+
assuming the charge has been neutralized.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
num_na_to_add = abs(reaction_dict[charges_column])
|
|
106
|
+
sodium_ion = "[Na+]"
|
|
107
|
+
|
|
108
|
+
# Generate the string to add, with the correct number of sodium ions
|
|
109
|
+
sodium_addition = (
|
|
110
|
+
"." + ".".join([sodium_ion] * num_na_to_add) if num_na_to_add > 0 else ""
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Add the sodium ions to reactants and products
|
|
114
|
+
new_reactants = reaction_dict["reactants"] + sodium_addition
|
|
115
|
+
new_products = reaction_dict["products"] + sodium_addition
|
|
116
|
+
|
|
117
|
+
# Generate the new reaction string
|
|
118
|
+
new_reactions = new_reactants + ">>" + new_products
|
|
119
|
+
|
|
120
|
+
# Create the new reaction dictionary
|
|
121
|
+
new_reaction_dict = {
|
|
122
|
+
id_column: reaction_dict["R-id"],
|
|
123
|
+
reaction_column: new_reactions,
|
|
124
|
+
"reactants": new_reactants,
|
|
125
|
+
"products": new_products,
|
|
126
|
+
charges_column: 0, # Assuming the charge is neutralized
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
return new_reaction_dict
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def fix_positive_charge(
|
|
133
|
+
reaction_dict: Dict[str, any],
|
|
134
|
+
charges_column: str = "total_charge_in_products",
|
|
135
|
+
id_column: str = "R-id",
|
|
136
|
+
reaction_column: str = "reactions",
|
|
137
|
+
) -> Dict[str, any]:
|
|
138
|
+
"""
|
|
139
|
+
Adjusts a reaction dictionary to compensate for a positive charge
|
|
140
|
+
in the products by adding [Cl-] ions. The function
|
|
141
|
+
takes into account the total positive charge indicated
|
|
142
|
+
in the reaction dictionary and adds an equivalent number of
|
|
143
|
+
chloride ions ([Cl-]) to both reactants and products to neutralize the charge.
|
|
144
|
+
|
|
145
|
+
Parameters::
|
|
146
|
+
- reaction_dict (Dict[str, any]): A dictionary representing a chemical reaction.
|
|
147
|
+
This dictionary must include keys for reactants, products, and a specified charge
|
|
148
|
+
column (default is 'total_charge_in_products') which contains the total charge of
|
|
149
|
+
the products.
|
|
150
|
+
- charges_column (str, optional): The key in `reaction_dict` that contains the
|
|
151
|
+
total charge of the products. Defaults to 'total_charge_in_products'.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
- Dict[str, any]: A modified reaction dictionary with added [Cl-] ions to
|
|
155
|
+
neutralize the positive charge. The 'total_charge_in_products' is updated to 0,
|
|
156
|
+
indicating that the reaction's charge has been neutralized. The dictionary
|
|
157
|
+
includes updated 'reactants', 'products', and a new reaction string.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
num_cl_to_add = abs(reaction_dict[charges_column])
|
|
161
|
+
chloride_ion = "[Cl-]"
|
|
162
|
+
|
|
163
|
+
# Generate the string to add, with the correct number of chloride ions
|
|
164
|
+
chloride_addition = (
|
|
165
|
+
"." + ".".join([chloride_ion] * num_cl_to_add) if num_cl_to_add > 0 else ""
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
# Add the chloride ions to reactants and products
|
|
169
|
+
new_reactants = reaction_dict["reactants"] + chloride_addition
|
|
170
|
+
new_products = reaction_dict["products"] + chloride_addition
|
|
171
|
+
|
|
172
|
+
# Generate the new reaction string
|
|
173
|
+
new_reactions = new_reactants + ">>" + new_products
|
|
174
|
+
|
|
175
|
+
# Create and return the new reaction dictionary with the neutralized charge
|
|
176
|
+
new_reaction_dict = {
|
|
177
|
+
"R-id": reaction_dict[id_column],
|
|
178
|
+
reaction_column: new_reactions,
|
|
179
|
+
"reactants": new_reactants,
|
|
180
|
+
"products": new_products,
|
|
181
|
+
charges_column: 0,
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return new_reaction_dict
|
|
185
|
+
|
|
186
|
+
@staticmethod
|
|
187
|
+
def fix_unbalanced_charged(
|
|
188
|
+
reaction_dict: Dict[str, any],
|
|
189
|
+
reaction_column: str,
|
|
190
|
+
) -> Dict[str, any]:
|
|
191
|
+
"""
|
|
192
|
+
Adjusts a reaction dictionary to compensate for an unbalanced charge in the
|
|
193
|
+
products by adding either [Cl-] ions for a positive charge or [Na+] ions for a
|
|
194
|
+
negative charge. The function determines the direction of the charge imbalance
|
|
195
|
+
using the specified charges column and applies the appropriate correction.
|
|
196
|
+
|
|
197
|
+
Parameters::
|
|
198
|
+
- reaction_dict (Dict[str, any]): A dictionary representing a chemical reaction.
|
|
199
|
+
This dictionary must include keys for reactants, products, and a specified charge
|
|
200
|
+
column which contains the total charge of the products.
|
|
201
|
+
- charges_column (str, optional): The key in `reaction_dict` that contains the
|
|
202
|
+
total charge of the products. Defaults to 'total_charge_in_products'.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
- Dict[str, any]: A modified reaction dictionary with added ions to neutralize the
|
|
206
|
+
charge imbalance. The returned dictionary will have its charge neutralized and
|
|
207
|
+
include updated 'reactants', 'products', and a new reaction string. The specific
|
|
208
|
+
ions added ([Cl-] for positive charges or [Na+] for negative charges) depend on
|
|
209
|
+
the initial charge imbalance.
|
|
210
|
+
"""
|
|
211
|
+
reaction_dict = Neutralize.calculate_charge_dict(reaction_dict, reaction_column)
|
|
212
|
+
if reaction_dict["total_charge_in_products"] > 0:
|
|
213
|
+
return Neutralize.fix_positive_charge(
|
|
214
|
+
reaction_dict, "total_charge_in_products"
|
|
215
|
+
)
|
|
216
|
+
elif reaction_dict["total_charge_in_products"] < 0:
|
|
217
|
+
return Neutralize.fix_negative_charge(
|
|
218
|
+
reaction_dict, "total_charge_in_products"
|
|
219
|
+
)
|
|
220
|
+
else:
|
|
221
|
+
return reaction_dict
|
|
222
|
+
|
|
223
|
+
@classmethod
|
|
224
|
+
def parallel_fix_unbalanced_charge(
|
|
225
|
+
cls,
|
|
226
|
+
reaction_dicts: List[Dict[str, Any]],
|
|
227
|
+
reaction_column: str,
|
|
228
|
+
n_jobs: int = 4,
|
|
229
|
+
) -> List[Dict[str, Any]]:
|
|
230
|
+
"""
|
|
231
|
+
Processes a list of reaction dictionaries in parallel to compensate
|
|
232
|
+
for unbalanced charges in the products, adding either [Cl-] ions
|
|
233
|
+
for positive charges or [Na+] ions for negative charges.
|
|
234
|
+
|
|
235
|
+
Parameters::
|
|
236
|
+
- reaction_dicts (List[Dict[str, Any]]): A list of dictionaries, each representing
|
|
237
|
+
a chemical reaction that may have an unbalanced charge.
|
|
238
|
+
- charges_column (str): The key in each reaction dictionary that contains the
|
|
239
|
+
total charge of the products. Defaults to 'total_charge_in_products'.
|
|
240
|
+
- n_jobs (int): The number of CPU cores to use for parallel processing.
|
|
241
|
+
-1 means using all available cores.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
- List[Dict[str, Any]]: A list of modified reaction dictionaries
|
|
245
|
+
with charges neutralized, reflecting the addition of necessary ions.
|
|
246
|
+
|
|
247
|
+
Note:
|
|
248
|
+
- This function requires the joblib library for parallel execution.
|
|
249
|
+
Ensure joblib is installed and available for import.
|
|
250
|
+
"""
|
|
251
|
+
# Use joblib.Parallel and joblib.delayed to parallelize the charge fixing
|
|
252
|
+
fixed_reactions = Parallel(n_jobs=n_jobs)(
|
|
253
|
+
delayed(cls.fix_unbalanced_charged)(reaction_dict, reaction_column)
|
|
254
|
+
for reaction_dict in reaction_dicts
|
|
255
|
+
)
|
|
256
|
+
return fixed_reactions
|