synkit 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synkit/Chem/Fingerprint/__init__.py +0 -0
- synkit/Chem/Fingerprint/fp_calculator.py +122 -0
- synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
- synkit/Chem/Fingerprint/transformation_fp.py +79 -0
- synkit/Chem/Molecule/__init__.py +0 -0
- synkit/Chem/Molecule/standardize.py +137 -0
- synkit/Chem/Reaction/__init__.py +0 -0
- synkit/Chem/Reaction/balance_check.py +162 -0
- synkit/Chem/Reaction/cleanning.py +59 -0
- synkit/Chem/Reaction/deionize.py +289 -0
- synkit/Chem/Reaction/neutralize.py +256 -0
- synkit/Chem/Reaction/reagent.py +102 -0
- synkit/Chem/Reaction/standardize.py +157 -0
- synkit/Chem/Reaction/tautomerize.py +168 -0
- synkit/Graph/Cluster/__init__.py +0 -0
- synkit/Graph/Cluster/morphism.py +83 -0
- synkit/Graph/Feature/__init__.py +0 -0
- synkit/Graph/Feature/graph_descriptors.py +325 -0
- synkit/Graph/Feature/graph_fps.py +97 -0
- synkit/Graph/Feature/graph_signature.py +236 -0
- synkit/Graph/Feature/hash_fps.py +130 -0
- synkit/Graph/Feature/morgan_fps.py +87 -0
- synkit/Graph/Feature/path_fps.py +82 -0
- synkit/Graph/__init.py +0 -0
- synkit/IO/__init__.py +0 -0
- synkit/IO/chem_converter.py +231 -0
- synkit/IO/data_io.py +277 -0
- synkit/IO/data_process.py +49 -0
- synkit/IO/debug.py +78 -0
- synkit/IO/dg_to_gml.py +124 -0
- synkit/IO/gml_to_nx.py +119 -0
- synkit/IO/graph_to_mol.py +110 -0
- synkit/IO/mol_to_graph.py +282 -0
- synkit/IO/nx_to_gml.py +200 -0
- synkit/IO/parse_rule.py +172 -0
- synkit/IO/smiles_to_id.py +119 -0
- synkit/ITS/_misc.py +280 -0
- synkit/ITS/aam_validator.py +254 -0
- synkit/ITS/its_builder.py +94 -0
- synkit/ITS/its_construction.py +213 -0
- synkit/ITS/normalize_aam.py +183 -0
- synkit/ITS/partial_expand.py +170 -0
- synkit/Reactor/__init__.py +0 -0
- synkit/Reactor/core_engine.py +164 -0
- synkit/Reactor/inference.py +73 -0
- synkit/Reactor/multi_step.py +227 -0
- synkit/Reactor/multi_step_aam.py +82 -0
- synkit/Reactor/reagent.py +95 -0
- synkit/Reactor/rule_apply.py +81 -0
- synkit/Vis/__init__.py +0 -0
- synkit/Vis/chemical_graph_visualizer.py +378 -0
- synkit/Vis/chemical_reaction_visualizer.py +133 -0
- synkit/Vis/chemical_space.py +83 -0
- synkit/Vis/embedding.py +92 -0
- synkit/Vis/graph_visualizer.py +286 -0
- synkit/Vis/pdf_writer.py +143 -0
- synkit/Vis/rsmi_to_fig.py +169 -0
- synkit/__init__.py +0 -0
- synkit/_misc.py +181 -0
- synkit-0.0.1.dist-info/METADATA +148 -0
- synkit-0.0.1.dist-info/RECORD +63 -0
- synkit-0.0.1.dist-info/WHEEL +4 -0
- synkit-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from rxnmapper import RXNMapper
|
|
2
|
+
from typing import Tuple, Optional
|
|
3
|
+
from synkit.IO.debug import setup_logging
|
|
4
|
+
|
|
5
|
+
logger = setup_logging()
|
|
6
|
+
rxn_mapper = RXNMapper()
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Reagent:
|
|
10
|
+
def __init__(self) -> None:
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def map_with_rxn_mapper(
|
|
15
|
+
reaction_smiles: str, rxn_mapper: RXNMapper = rxn_mapper
|
|
16
|
+
) -> str:
|
|
17
|
+
"""
|
|
18
|
+
Maps a reaction SMILES string using the RXNMapper.
|
|
19
|
+
|
|
20
|
+
Parameters:
|
|
21
|
+
- reaction_smiles (str): The SMILES string of the reaction to be mapped.
|
|
22
|
+
- rxn_mapper (RXNMapper): The RXNMapper instance used for mapping.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
- str: The atom-mapped reaction SMILES string, or the original reaction if mapping fails.
|
|
26
|
+
"""
|
|
27
|
+
try:
|
|
28
|
+
# Perform the atom mapping using RXNMapper
|
|
29
|
+
mapped_rxn = rxn_mapper.get_attention_guided_atom_maps(
|
|
30
|
+
[reaction_smiles], canonicalize_rxns=False
|
|
31
|
+
)[0]["mapped_rxn"]
|
|
32
|
+
# Ensure we only take the mapped reaction if any extra information exists
|
|
33
|
+
return mapped_rxn.split(" ")[0] if " " in mapped_rxn else mapped_rxn
|
|
34
|
+
except Exception as e:
|
|
35
|
+
logger.error(
|
|
36
|
+
f"RXNMapper mapping failed for reaction '{reaction_smiles}': {e}"
|
|
37
|
+
)
|
|
38
|
+
return reaction_smiles
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def clean_reagents(
|
|
42
|
+
reaction_smiles: str, return_clean_reaction: bool = True
|
|
43
|
+
) -> Tuple[Optional[str], Optional[str]]:
|
|
44
|
+
"""
|
|
45
|
+
Identifies and removes reagents from a reaction based on atom mappings.
|
|
46
|
+
|
|
47
|
+
Parameters:
|
|
48
|
+
- reaction_smiles (str): The reaction in SMILES format.
|
|
49
|
+
- return_clean_reaction (bool): If True, returns the cleaned reaction and the detected reagents.
|
|
50
|
+
If False, only returns the detected reagents.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
- Tuple[Optional[str], Optional[str]]: A tuple containing the cleaned reaction (if requested) and the reagents.
|
|
54
|
+
If an error occurs, both elements of the tuple will be `None`.
|
|
55
|
+
"""
|
|
56
|
+
try:
|
|
57
|
+
# Generate atom-atom mappings for the reaction
|
|
58
|
+
mapped_rsmi = Reagent.map_with_rxn_mapper(reaction_smiles)
|
|
59
|
+
|
|
60
|
+
# Split the original and mapped reactions into reactants and products
|
|
61
|
+
precursor, product = reaction_smiles.split(">>")
|
|
62
|
+
precursor1, product1 = mapped_rsmi.split(">>")
|
|
63
|
+
|
|
64
|
+
# Step 1: Identify unmapped reactants (reagents)
|
|
65
|
+
reactant_elements = precursor.split(".")
|
|
66
|
+
mapped_reactant_elements = precursor1.split(".")
|
|
67
|
+
clean_reactants = [
|
|
68
|
+
r for r in reactant_elements if r not in mapped_reactant_elements
|
|
69
|
+
]
|
|
70
|
+
reagents_react = [
|
|
71
|
+
r for r in reactant_elements if r in mapped_reactant_elements
|
|
72
|
+
]
|
|
73
|
+
|
|
74
|
+
# Step 2: Identify unmapped products (reagents)
|
|
75
|
+
product_elements = product.split(".")
|
|
76
|
+
mapped_product_elements = product1.split(".")
|
|
77
|
+
clean_products = [
|
|
78
|
+
p for p in product_elements if p not in mapped_product_elements
|
|
79
|
+
]
|
|
80
|
+
reagents_prod = [
|
|
81
|
+
p for p in product_elements if p in mapped_product_elements
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# Combine the reagents detected from both reactants and products
|
|
85
|
+
reagents = reagents_react + reagents_prod
|
|
86
|
+
reagents_str = ".".join(reagents) if reagents else None
|
|
87
|
+
|
|
88
|
+
# Cleaned reaction without reagents
|
|
89
|
+
clean_reaction = (
|
|
90
|
+
(".".join(clean_reactants) + ">>" + ".".join(clean_products))
|
|
91
|
+
if clean_reactants or clean_products
|
|
92
|
+
else None
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
if return_clean_reaction:
|
|
96
|
+
return clean_reaction, reagents_str
|
|
97
|
+
else:
|
|
98
|
+
return None, reagents_str
|
|
99
|
+
|
|
100
|
+
except Exception as e:
|
|
101
|
+
logger.error(f"Error processing reaction: {e}")
|
|
102
|
+
return None, None
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from rdkit import Chem
|
|
2
|
+
from typing import List, Optional, Tuple
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Standardize:
|
|
6
|
+
def __init__(self) -> None:
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
@staticmethod
|
|
10
|
+
def remove_atom_mapping(reaction_smiles: str, symbol: str = ">>") -> str:
|
|
11
|
+
"""
|
|
12
|
+
Removes atom mappings from both reactants and products in a reaction SMILES.
|
|
13
|
+
|
|
14
|
+
Parameters:
|
|
15
|
+
- reaction_smiles (str): A reaction SMILES string with atom mappings.
|
|
16
|
+
- symbol (str): The symbol that separates reactants and products.
|
|
17
|
+
Default is '>>'.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
- str: The reaction SMILES with atom mappings removed.
|
|
21
|
+
"""
|
|
22
|
+
# Split the reaction SMILES into reactants and products
|
|
23
|
+
parts = reaction_smiles.split(symbol)
|
|
24
|
+
if len(parts) != 2:
|
|
25
|
+
raise ValueError(
|
|
26
|
+
"Invalid reaction SMILES format."
|
|
27
|
+
+ " Expected format: 'reactants>>products'."
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
def clean_smiles(smiles: str) -> str:
|
|
31
|
+
mol = Chem.MolFromSmiles(smiles) # Convert SMILES to an RDKit mol object
|
|
32
|
+
if mol is None:
|
|
33
|
+
raise ValueError(f"Invalid SMILES string: {smiles}")
|
|
34
|
+
for atom in mol.GetAtoms():
|
|
35
|
+
atom.SetAtomMapNum(0) # Remove atom mapping
|
|
36
|
+
return Chem.MolToSmiles(mol, True) # Convert mol back to SMILES
|
|
37
|
+
|
|
38
|
+
# Apply the cleaning function to both reactants and products
|
|
39
|
+
reactants_clean = clean_smiles(parts[0])
|
|
40
|
+
products_clean = clean_smiles(parts[1])
|
|
41
|
+
|
|
42
|
+
# Combine the cleaned reactants and products back into a reaction SMILES
|
|
43
|
+
return f"{reactants_clean}{symbol}{products_clean}"
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def filter_valid_molecules(smiles_list: List[str]) -> List[Chem.Mol]:
|
|
47
|
+
"""
|
|
48
|
+
Filters and returns valid RDKit molecule objects from a list of SMILES strings.
|
|
49
|
+
|
|
50
|
+
Parameters:
|
|
51
|
+
- smiles_list (List[str]): A list of SMILES strings.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
- List[Chem.Mol]: A list of valid RDKit molecule objects.
|
|
55
|
+
"""
|
|
56
|
+
valid_molecules = []
|
|
57
|
+
for smiles in smiles_list:
|
|
58
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
59
|
+
if mol:
|
|
60
|
+
valid_molecules.append(mol)
|
|
61
|
+
return valid_molecules
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def standardize_rsmi(rsmi: str, stereo: bool = False) -> Optional[str]:
|
|
65
|
+
"""
|
|
66
|
+
Standardizes a reaction SMILES (rSMI) by:
|
|
67
|
+
- Ensuring all reactants and products are valid molecules.
|
|
68
|
+
- Sorting the SMILES strings of reactants and products in ascending order.
|
|
69
|
+
- Optionally considering stereochemistry.
|
|
70
|
+
|
|
71
|
+
Parameters:
|
|
72
|
+
- rsmi (str): The reaction SMILES string to be standardized.
|
|
73
|
+
- stereo (bool): If True, stereochemical information is included in the SMILES.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
- Optional[str]: The standardized reaction SMILES,
|
|
77
|
+
or None if no valid molecules are found.
|
|
78
|
+
"""
|
|
79
|
+
try:
|
|
80
|
+
reactants, products = rsmi.split(">>")
|
|
81
|
+
except ValueError:
|
|
82
|
+
raise ValueError(
|
|
83
|
+
"Invalid reaction SMILES format."
|
|
84
|
+
+ " Expected format: 'reactants>>products'."
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
reactant_molecules = Standardize.filter_valid_molecules(reactants.split("."))
|
|
88
|
+
product_molecules = Standardize.filter_valid_molecules(products.split("."))
|
|
89
|
+
|
|
90
|
+
if not reactant_molecules or not product_molecules:
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
standardized_reactants = ".".join(
|
|
94
|
+
sorted(
|
|
95
|
+
Chem.MolToSmiles(mol, isomericSmiles=stereo)
|
|
96
|
+
for mol in reactant_molecules
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
standardized_products = ".".join(
|
|
100
|
+
sorted(
|
|
101
|
+
Chem.MolToSmiles(mol, isomericSmiles=stereo)
|
|
102
|
+
for mol in product_molecules
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
return f"{standardized_reactants}>>{standardized_products}"
|
|
107
|
+
|
|
108
|
+
def fit(
|
|
109
|
+
self, rsmi: str, remove_aam: bool = True, ignore_stereo: bool = True
|
|
110
|
+
) -> Optional[str]:
|
|
111
|
+
"""
|
|
112
|
+
Fits the reaction SMILES by removing atom mappings and standardizing the reaction.
|
|
113
|
+
|
|
114
|
+
Parameters:
|
|
115
|
+
- rsmi (str): The reaction SMILES string to be processed.
|
|
116
|
+
- remove_aam (bool): If True, atom mappings are removed from the reaction SMILES.
|
|
117
|
+
- ignore_stereo (bool): If True, stereochemistry is ignored in the SMILES.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
- Optional[str]: The processed reaction SMILES, or None if the
|
|
121
|
+
standardization fails.
|
|
122
|
+
"""
|
|
123
|
+
if remove_aam:
|
|
124
|
+
rsmi = self.remove_atom_mapping(rsmi)
|
|
125
|
+
|
|
126
|
+
rsmi = self.standardize_rsmi(rsmi, not ignore_stereo)
|
|
127
|
+
rsmi = rsmi.replace("[HH]", "[H][H]")
|
|
128
|
+
return rsmi
|
|
129
|
+
|
|
130
|
+
@staticmethod
|
|
131
|
+
def categorize_reactions(
|
|
132
|
+
reactions: List[str], target_reaction: str
|
|
133
|
+
) -> Tuple[List[str], List[str]]:
|
|
134
|
+
"""
|
|
135
|
+
Sorts a list of reaction SMILES strings into two groups based on
|
|
136
|
+
their match with a specified target reaction. The categorization process
|
|
137
|
+
distinguishes between reactions that align with the target reaction
|
|
138
|
+
and those that do not.
|
|
139
|
+
|
|
140
|
+
Parameters:
|
|
141
|
+
- reactions (List[str]): The array of reaction SMILES strings to be categorized.
|
|
142
|
+
- target_reaction (str): The SMILES string of the target reaction
|
|
143
|
+
used as the benchmark for categorization.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
- Tuple[List[str], List[str]]: A pair of lists, where the first contains
|
|
147
|
+
reactions matching the target and the second
|
|
148
|
+
comprises non-matching reactions.
|
|
149
|
+
"""
|
|
150
|
+
match, not_match = [], []
|
|
151
|
+
target_reaction = Standardize.standardize_rsmi(target_reaction, stereo=False)
|
|
152
|
+
for reaction_smiles in reactions:
|
|
153
|
+
if reaction_smiles == target_reaction:
|
|
154
|
+
match.append(reaction_smiles)
|
|
155
|
+
else:
|
|
156
|
+
not_match.append(reaction_smiles)
|
|
157
|
+
return match, not_match
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
from typing import List, Dict, Optional
|
|
2
|
+
from rdkit import Chem
|
|
3
|
+
from fgutils import FGQuery
|
|
4
|
+
from joblib import Parallel, delayed
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Tautomerize:
|
|
8
|
+
"""
|
|
9
|
+
A class to standardize molecules by converting specific functional groups to their
|
|
10
|
+
more common forms using RDKit for molecule manipulation.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def standardize_enol(smiles: str, atom_indices: Optional[List[int]] = None) -> str:
|
|
15
|
+
"""
|
|
16
|
+
Converts an enol form to a carbonyl form based on specified atom indices.
|
|
17
|
+
|
|
18
|
+
Parameters:
|
|
19
|
+
- smiles (str): The SMILES string.
|
|
20
|
+
- atom_indices (List[int], optional): List containing indices of two carbons and
|
|
21
|
+
one oxygen involved in the enol formation. Defaults to [0, 1, 2].
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
- str: The SMILES string of the molecule after conversion.
|
|
25
|
+
Returns an error message if indices are invalid.
|
|
26
|
+
"""
|
|
27
|
+
if atom_indices is None:
|
|
28
|
+
atom_indices = [0, 1, 2]
|
|
29
|
+
|
|
30
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
31
|
+
if mol is None:
|
|
32
|
+
return "Invalid SMILES format."
|
|
33
|
+
emol = Chem.EditableMol(mol)
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
c1_idx, c2_idx = (
|
|
37
|
+
i for i in atom_indices if mol.GetAtomWithIdx(i).GetSymbol() == "C"
|
|
38
|
+
)
|
|
39
|
+
o_idx = next(
|
|
40
|
+
i for i in atom_indices if mol.GetAtomWithIdx(i).GetSymbol() == "O"
|
|
41
|
+
)
|
|
42
|
+
except Exception as e:
|
|
43
|
+
return f"Error processing indices: {str(e)}"
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
emol.RemoveBond(c1_idx, c2_idx)
|
|
47
|
+
emol.RemoveBond(c2_idx, o_idx)
|
|
48
|
+
emol.AddBond(c1_idx, c2_idx, order=Chem.rdchem.BondType.SINGLE)
|
|
49
|
+
emol.AddBond(c2_idx, o_idx, order=Chem.rdchem.BondType.DOUBLE)
|
|
50
|
+
new_mol = emol.GetMol()
|
|
51
|
+
Chem.SanitizeMol(new_mol)
|
|
52
|
+
return Chem.MolToSmiles(new_mol)
|
|
53
|
+
except Exception as e:
|
|
54
|
+
return f"Error in modifying molecule: {str(e)}"
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
def standardize_hemiketal(smiles: str, atom_indices: List[int]) -> str:
|
|
58
|
+
"""
|
|
59
|
+
Converts a hemiketal form to a carbonyl form based on specified atom indices.
|
|
60
|
+
|
|
61
|
+
Parameters:
|
|
62
|
+
- smiles (str): SMILES representation of the original molecule.
|
|
63
|
+
- atom_indices (List[int]): Indices of the carbon and two oxygen atoms
|
|
64
|
+
involved in the transformation.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
- str: SMILES string of the modified molecule if successful,
|
|
68
|
+
otherwise returns an error message.
|
|
69
|
+
"""
|
|
70
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
71
|
+
if mol is None:
|
|
72
|
+
return "Invalid SMILES format."
|
|
73
|
+
emol = Chem.EditableMol(mol)
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
c_idx = next(
|
|
77
|
+
i for i in atom_indices if mol.GetAtomWithIdx(i).GetSymbol() == "C"
|
|
78
|
+
)
|
|
79
|
+
o1_idx, o2_idx = (
|
|
80
|
+
i for i in atom_indices if mol.GetAtomWithIdx(i).GetSymbol() == "O"
|
|
81
|
+
)
|
|
82
|
+
emol.RemoveBond(c_idx, o1_idx)
|
|
83
|
+
emol.RemoveBond(c_idx, o2_idx)
|
|
84
|
+
emol.AddBond(c_idx, o1_idx, order=Chem.rdchem.BondType.DOUBLE)
|
|
85
|
+
new_mol = emol.GetMol()
|
|
86
|
+
Chem.SanitizeMol(new_mol)
|
|
87
|
+
return Chem.MolToSmiles(new_mol)
|
|
88
|
+
except Exception as e:
|
|
89
|
+
return f"Error in modifying molecule: {str(e)}"
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def fix_smiles(smiles: str) -> str:
|
|
93
|
+
"""
|
|
94
|
+
Performs the standardization process by identifying and converting all relevant
|
|
95
|
+
functional groups to their target forms based on predefined rules and updates the
|
|
96
|
+
SMILES string accordingly.
|
|
97
|
+
|
|
98
|
+
Parameters:
|
|
99
|
+
- smiles (str): SMILES string of the original molecule.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
- str: Canonical SMILES string of the standardized molecule.
|
|
103
|
+
"""
|
|
104
|
+
query = FGQuery()
|
|
105
|
+
fg = query.get(smiles)
|
|
106
|
+
for item in fg:
|
|
107
|
+
if "hemiketal" in item:
|
|
108
|
+
atom_indices = item[1]
|
|
109
|
+
smiles = Tautomerize.standardize_hemiketal(smiles, atom_indices)
|
|
110
|
+
fg = query.get(smiles)
|
|
111
|
+
elif "enol" in item:
|
|
112
|
+
atom_indices = item[1]
|
|
113
|
+
smiles = Tautomerize.standardize_enol(smiles, atom_indices)
|
|
114
|
+
fg = query.get(smiles)
|
|
115
|
+
return Chem.CanonSmiles(smiles)
|
|
116
|
+
|
|
117
|
+
@staticmethod
|
|
118
|
+
def fix_dict(data: Dict[str, str], reaction_column: str) -> Dict[str, str]:
|
|
119
|
+
"""
|
|
120
|
+
Updates a dictionary containing reaction data by
|
|
121
|
+
standardizing the SMILES strings of reactants and products.
|
|
122
|
+
|
|
123
|
+
Parameters:
|
|
124
|
+
- data (Dict[str, str]): Dictionary containing the reaction data.
|
|
125
|
+
- reaction_column (str): The key in the dictionary where the reaction SMILES
|
|
126
|
+
string is stored.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
- Dict[str, str]: The updated dictionary with standardized SMILES strings.
|
|
130
|
+
"""
|
|
131
|
+
try:
|
|
132
|
+
reactants, products = data[reaction_column].split(">>")
|
|
133
|
+
reactants = Tautomerize.fix_smiles(reactants)
|
|
134
|
+
products = Tautomerize.fix_smiles(products)
|
|
135
|
+
data[reaction_column] = f"{reactants}>>{products}"
|
|
136
|
+
except ValueError:
|
|
137
|
+
smiles = data[reaction_column]
|
|
138
|
+
smiles = Tautomerize.fix_smiles(smiles)
|
|
139
|
+
data[reaction_column] = smiles
|
|
140
|
+
return data
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def fix_dicts(
|
|
144
|
+
data: List[Dict[str, str]],
|
|
145
|
+
reaction_column: str,
|
|
146
|
+
n_jobs: int = 4,
|
|
147
|
+
verbose: int = 0,
|
|
148
|
+
) -> List[Dict[str, str]]:
|
|
149
|
+
"""
|
|
150
|
+
Standardizes multiple dictionaries containing
|
|
151
|
+
reaction data in parallel.
|
|
152
|
+
|
|
153
|
+
Parameters:
|
|
154
|
+
- data (List[Dict[str, str]]): List of dictionaries, each containing reaction
|
|
155
|
+
data.
|
|
156
|
+
- reaction_column (str): The key where the reaction SMILES strings are
|
|
157
|
+
stored in each dictionary.
|
|
158
|
+
- n_jobs (int, optional): Number of jobs to run in parallel. Defaults to 4.
|
|
159
|
+
- verbose (int, optional): The verbosity level. Defaults to 0.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
- List[Dict[str, str]]: A list of updated dictionaries
|
|
163
|
+
with standardized SMILES strings.
|
|
164
|
+
"""
|
|
165
|
+
results = Parallel(n_jobs=n_jobs, verbose=verbose)(
|
|
166
|
+
delayed(Tautomerize.fix_dict)(d, reaction_column) for d in data
|
|
167
|
+
)
|
|
168
|
+
return results
|
|
File without changes
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
from operator import eq
|
|
3
|
+
import networkx as nx
|
|
4
|
+
from typing import Callable, Optional
|
|
5
|
+
from networkx.algorithms.isomorphism import generic_node_match, generic_edge_match
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def graph_isomorphism(
|
|
9
|
+
graph_1: nx.Graph,
|
|
10
|
+
graph_2: nx.Graph,
|
|
11
|
+
node_match: Optional[Callable] = None,
|
|
12
|
+
edge_match: Optional[Callable] = None,
|
|
13
|
+
use_defaults: bool = False,
|
|
14
|
+
) -> bool:
|
|
15
|
+
"""
|
|
16
|
+
Determines if two graphs are isomorphic, considering provided node and edge matching
|
|
17
|
+
functions. Uses default matching settings if none are provided.
|
|
18
|
+
|
|
19
|
+
Parameters:
|
|
20
|
+
- graph_1 (nx.Graph): The first graph to compare.
|
|
21
|
+
- graph_2 (nx.Graph): The second graph to compare.
|
|
22
|
+
- node_match (Optional[Callable]): The function used to match nodes.
|
|
23
|
+
Uses default if None.
|
|
24
|
+
- edge_match (Optional[Callable]): The function used to match edges.
|
|
25
|
+
Uses default if None.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
- bool: True if the graphs are isomorphic, False otherwise.
|
|
29
|
+
"""
|
|
30
|
+
# Define default node and edge attributes and match settings
|
|
31
|
+
if use_defaults:
|
|
32
|
+
node_label_names = ["element", "charge"]
|
|
33
|
+
node_label_default = ["*", 0]
|
|
34
|
+
edge_attribute = "order"
|
|
35
|
+
|
|
36
|
+
# Default node and edge match functions if not provided
|
|
37
|
+
if node_match is None:
|
|
38
|
+
node_match = generic_node_match(
|
|
39
|
+
node_label_names, node_label_default, [eq] * len(node_label_names)
|
|
40
|
+
)
|
|
41
|
+
if edge_match is None:
|
|
42
|
+
edge_match = generic_edge_match(edge_attribute, 1, eq)
|
|
43
|
+
|
|
44
|
+
# Perform the isomorphism check using NetworkX
|
|
45
|
+
return nx.is_isomorphic(
|
|
46
|
+
graph_1, graph_2, node_match=node_match, edge_match=edge_match
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def rule_isomorphism(
|
|
51
|
+
rule_1: str, rule_2: str, morphism_type: str = "isomorphic"
|
|
52
|
+
) -> bool:
|
|
53
|
+
"""
|
|
54
|
+
Evaluates if two GML-formatted rule representations are isomorphic or one is a
|
|
55
|
+
subgraph of the other.
|
|
56
|
+
|
|
57
|
+
Converts GML strings to `ruleGMLString` objects and uses these to check for:
|
|
58
|
+
- 'isomorphic': Complete structural correspondence between both rules.
|
|
59
|
+
- 'monomorphic': One rule being a subgraph of the other.
|
|
60
|
+
|
|
61
|
+
Parameters:
|
|
62
|
+
- rule_1 (str): GML string of the first rule.
|
|
63
|
+
- rule_2 (str): GML string of the second rule.
|
|
64
|
+
- morphism_type (str, optional): Type of morphism to check
|
|
65
|
+
('isomorphic' or 'monomorphic').
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
- bool: True if the specified morphism condition is met, False otherwise.
|
|
69
|
+
|
|
70
|
+
Raises:
|
|
71
|
+
- Exception: Issues during GML parsing or morphism checking.
|
|
72
|
+
"""
|
|
73
|
+
from mod import ruleGMLString
|
|
74
|
+
|
|
75
|
+
# Create ruleGMLString objects from the GML strings
|
|
76
|
+
rule_obj_1 = ruleGMLString(rule_1)
|
|
77
|
+
rule_obj_2 = ruleGMLString(rule_2)
|
|
78
|
+
|
|
79
|
+
# Check the relationship based on morphism_type and return the result
|
|
80
|
+
if morphism_type == "isomorphic":
|
|
81
|
+
return rule_obj_1.isomorphism(rule_obj_2) == 1
|
|
82
|
+
else:
|
|
83
|
+
return rule_obj_1.monomorphism(rule_obj_2) == 1
|
|
File without changes
|