synkit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. synkit/Chem/Fingerprint/__init__.py +0 -0
  2. synkit/Chem/Fingerprint/fp_calculator.py +122 -0
  3. synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
  4. synkit/Chem/Fingerprint/transformation_fp.py +79 -0
  5. synkit/Chem/Molecule/__init__.py +0 -0
  6. synkit/Chem/Molecule/standardize.py +137 -0
  7. synkit/Chem/Reaction/__init__.py +0 -0
  8. synkit/Chem/Reaction/balance_check.py +162 -0
  9. synkit/Chem/Reaction/cleanning.py +59 -0
  10. synkit/Chem/Reaction/deionize.py +289 -0
  11. synkit/Chem/Reaction/neutralize.py +256 -0
  12. synkit/Chem/Reaction/reagent.py +102 -0
  13. synkit/Chem/Reaction/standardize.py +157 -0
  14. synkit/Chem/Reaction/tautomerize.py +168 -0
  15. synkit/Graph/Cluster/__init__.py +0 -0
  16. synkit/Graph/Cluster/morphism.py +83 -0
  17. synkit/Graph/Feature/__init__.py +0 -0
  18. synkit/Graph/Feature/graph_descriptors.py +325 -0
  19. synkit/Graph/Feature/graph_fps.py +97 -0
  20. synkit/Graph/Feature/graph_signature.py +236 -0
  21. synkit/Graph/Feature/hash_fps.py +130 -0
  22. synkit/Graph/Feature/morgan_fps.py +87 -0
  23. synkit/Graph/Feature/path_fps.py +82 -0
  24. synkit/Graph/__init.py +0 -0
  25. synkit/IO/__init__.py +0 -0
  26. synkit/IO/chem_converter.py +231 -0
  27. synkit/IO/data_io.py +277 -0
  28. synkit/IO/data_process.py +49 -0
  29. synkit/IO/debug.py +78 -0
  30. synkit/IO/dg_to_gml.py +124 -0
  31. synkit/IO/gml_to_nx.py +119 -0
  32. synkit/IO/graph_to_mol.py +110 -0
  33. synkit/IO/mol_to_graph.py +282 -0
  34. synkit/IO/nx_to_gml.py +200 -0
  35. synkit/IO/parse_rule.py +172 -0
  36. synkit/IO/smiles_to_id.py +119 -0
  37. synkit/ITS/_misc.py +280 -0
  38. synkit/ITS/aam_validator.py +254 -0
  39. synkit/ITS/its_builder.py +94 -0
  40. synkit/ITS/its_construction.py +213 -0
  41. synkit/ITS/normalize_aam.py +183 -0
  42. synkit/ITS/partial_expand.py +170 -0
  43. synkit/Reactor/__init__.py +0 -0
  44. synkit/Reactor/core_engine.py +164 -0
  45. synkit/Reactor/inference.py +73 -0
  46. synkit/Reactor/multi_step.py +227 -0
  47. synkit/Reactor/multi_step_aam.py +82 -0
  48. synkit/Reactor/reagent.py +95 -0
  49. synkit/Reactor/rule_apply.py +81 -0
  50. synkit/Vis/__init__.py +0 -0
  51. synkit/Vis/chemical_graph_visualizer.py +378 -0
  52. synkit/Vis/chemical_reaction_visualizer.py +133 -0
  53. synkit/Vis/chemical_space.py +83 -0
  54. synkit/Vis/embedding.py +92 -0
  55. synkit/Vis/graph_visualizer.py +286 -0
  56. synkit/Vis/pdf_writer.py +143 -0
  57. synkit/Vis/rsmi_to_fig.py +169 -0
  58. synkit/__init__.py +0 -0
  59. synkit/_misc.py +181 -0
  60. synkit-0.0.1.dist-info/METADATA +148 -0
  61. synkit-0.0.1.dist-info/RECORD +63 -0
  62. synkit-0.0.1.dist-info/WHEEL +4 -0
  63. synkit-0.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,227 @@
1
+ from typing import List, Dict, Tuple
2
+ from synkit.Chem.Reaction.standardize import Standardize
3
+ from synkit.Reactor.core_engine import CoreEngine
4
+
5
+ std = Standardize()
6
+
7
+
8
+ def perform_multi_step_reaction(
9
+ gml_list: List[str], order: List[int], rsmi: str
10
+ ) -> Tuple[List[List[str]], Dict[str, List[str]]]:
11
+ """
12
+ Applies a sequence of multi-step reactions to a starting SMILES string. The function
13
+ processes each reaction step in a specified order, and returns both the intermediate
14
+ and final products, as well as a mapping of reactant SMILES to their
15
+ corresponding products.
16
+
17
+ Parameters:
18
+ - gml_list (List[str]): A list of reaction rules (in GML format) to be applied.
19
+ Each element corresponds to a reaction step.
20
+ - order (List[int]): A list of integers that defines the order in which the
21
+ reaction steps should be applied. Each integer is an index referring to the position
22
+ of a reaction rule in the `gml_list`.
23
+ - rsmi (str): The starting reaction SMILES string, representing the reactants for the
24
+ first reaction.
25
+
26
+ Returns:
27
+ - Tuple[List[List[str]], Dict[str, List[str]]]:
28
+ - A list of lists of SMILES strings, where each inner list contains the
29
+ RSMI generated at each reaction step.
30
+ - A dictionary mapping each RSMI string to the resulting products after applying
31
+ the reaction rules. The keys are the input RSMIs, and the values are the
32
+ resulting product SMILES strings.
33
+ """
34
+
35
+ # Initialize CoreEngine for reaction processing
36
+ core = CoreEngine()
37
+ # Initialize a dictionary to hold reaction results
38
+ reaction_results = {}
39
+
40
+ # List to store the results of each reaction step
41
+ all_steps: List[List[str]] = []
42
+ result: List[str] = [rsmi] # Initial result is the input SMILES string
43
+
44
+ # Loop over the reaction steps in the specified order
45
+ for i, j in enumerate(order):
46
+ # Get the reaction SMILES (RSMI) for the current step
47
+ current_step_gml = gml_list[j]
48
+ new_result: List[str] = [] # List to hold products for this step
49
+
50
+ # Apply the reaction for each current reactant SMILES
51
+ for current_rsmi in result:
52
+ smi_lst = (
53
+ current_rsmi.split(">>")[0].split(
54
+ "."
55
+ ) # Split reactants at the first step
56
+ if i == 0
57
+ else current_rsmi.split(">>")[1].split(
58
+ "."
59
+ ) # Split products for subsequent steps
60
+ )
61
+
62
+ # Perform the reaction using the CoreEngine
63
+ o = core.perform_reaction(current_step_gml, smi_lst)
64
+
65
+ # Apply standardization on the products
66
+ o = [std.fit(i) for i in o]
67
+
68
+ # Collect the new results (products) from this reaction step
69
+ new_result.extend(o)
70
+
71
+ # Record the reaction results in the dictionary, mapping input RSMI to output products
72
+ if len(o) > 0:
73
+ reaction_results[current_rsmi] = o
74
+
75
+ # Update the result list for the next step
76
+ result = new_result
77
+
78
+ # Append the results of this step to the overall steps list
79
+ all_steps.append(result)
80
+
81
+ # Return the results: a list of all steps and a dictionary of reaction results
82
+ return all_steps, reaction_results
83
+
84
+
85
+ def calculate_max_depth(reaction_tree, current_node=None, depth=0):
86
+ """
87
+ Calculate the maximum depth of a reaction tree.
88
+
89
+ Parameters:
90
+ - reaction_tree (dict): A dictionary where keys are reaction SMILES (RSMI)
91
+ and values are lists of product reactions.
92
+ - current_node (str): The current node in the tree being explored (reaction SMILES).
93
+ - depth (int): The current depth of the tree.
94
+
95
+ Returns:
96
+ - int: The maximum depth of the tree.
97
+ """
98
+ # If current_node is None, start from the root node (first key in the reaction tree)
99
+ if current_node is None:
100
+ current_node = list(reaction_tree.keys())[0]
101
+
102
+ # Get the products of the current node (reaction)
103
+ products = reaction_tree.get(current_node, [])
104
+
105
+ # If no products, we are at a leaf node, return the current depth
106
+ if not products:
107
+ return depth
108
+
109
+ # Recursively calculate the depth for each product and return the maximum
110
+ max_subtree_depth = max(
111
+ calculate_max_depth(reaction_tree, product, depth + 1) for product in products
112
+ )
113
+ return max_subtree_depth
114
+
115
+
116
+ # def find_all_paths(
117
+ # reaction_tree,
118
+ # target_products,
119
+ # current_node,
120
+ # target_depth,
121
+ # current_depth=0,
122
+ # path=None,
123
+ # ):
124
+ # """
125
+ # Recursively find all paths from the root to the maximum depth in the reaction tree.
126
+
127
+ # Parameters:
128
+ # - reaction_tree (dict): A dictionary of reaction SMILES with products.
129
+ # - current_node (str): The current node (reaction SMILES).
130
+ # - target_depth (int): The depth at which the product matches the root's product.
131
+ # - current_depth (int): The current depth of the search.
132
+ # - path (list): The current path in the tree.
133
+
134
+ # Returns:
135
+ # - List of all paths to the max depth.
136
+ # """
137
+ # if path is None:
138
+ # path = []
139
+
140
+ # # Add the current node (reaction SMILES) to the path
141
+ # path.append(current_node)
142
+
143
+ # # If we have reached the target depth, check the product
144
+ # if current_depth == target_depth:
145
+ # # Extract products of the current node
146
+ # products = sorted(current_node.split(">>")[1].split("."))
147
+ # return [path] if products == target_products else []
148
+
149
+ # # If we haven't reached the target depth, recurse on the products
150
+ # paths = []
151
+ # for product in reaction_tree.get(current_node, []):
152
+ # paths.extend(
153
+ # find_all_paths(
154
+ # reaction_tree,
155
+ # target_products,
156
+ # product,
157
+ # target_depth,
158
+ # current_depth + 1,
159
+ # path.copy(),
160
+ # )
161
+ # )
162
+
163
+ # return paths
164
+
165
+
166
+ def find_all_paths(
167
+ reaction_tree,
168
+ target_products,
169
+ current_node,
170
+ target_depth,
171
+ current_depth=0,
172
+ path=None,
173
+ ):
174
+ """
175
+ Recursively find all paths from the root to the maximum depth in the reaction tree.
176
+
177
+ Parameters:
178
+ - reaction_tree (dict): A dictionary of reaction SMILES with products.
179
+ - current_node (str): The current node (reaction SMILES).
180
+ - target_depth (int): The depth at which the product matches the root's product.
181
+ - current_depth (int): The current depth of the search.
182
+ - path (list): The current path in the tree.
183
+
184
+ Returns:
185
+ - List of all paths to the max depth.
186
+ """
187
+ if path is None:
188
+ path = []
189
+
190
+ # Add the current node (reaction SMILES) to the path
191
+ path.append(current_node)
192
+
193
+ # If we have reached the target depth, check the product
194
+ if current_depth == target_depth:
195
+ # Extract products of the current node
196
+ current_products = sorted(
197
+ current_node.split(">>")[1].split("."), key=len
198
+ ) # Sort by length of SMILES
199
+ largest_current_product = current_products[-1] if current_products else None
200
+
201
+ # Process target_products to get the largest product
202
+
203
+ sorted_target_products = sorted(
204
+ target_products, key=len
205
+ ) # target_products should be a string here
206
+
207
+ largest_target_product = (
208
+ sorted_target_products[-1] if sorted_target_products else None
209
+ )
210
+
211
+ # Compare the largest elements
212
+ return [path] if largest_current_product == largest_target_product else []
213
+
214
+ # If we haven't reached the target depth, recurse on the products
215
+ paths = []
216
+ for product in reaction_tree.get(current_node, []):
217
+ paths.extend(
218
+ find_all_paths(
219
+ reaction_tree,
220
+ target_products,
221
+ product,
222
+ target_depth,
223
+ current_depth + 1,
224
+ path.copy(),
225
+ )
226
+ )
227
+ return paths
@@ -0,0 +1,82 @@
1
+ from typing import List, Optional, Dict
2
+ from synkit.Reactor.reagent import (
3
+ remove_reagent_from_smiles,
4
+ add_catalysis,
5
+ )
6
+ from synkit.Reactor.multi_step import (
7
+ perform_multi_step_reaction,
8
+ find_all_paths,
9
+ )
10
+ from synkit.Reactor.inference import aam_infer
11
+
12
+
13
+ def get_aam_reactions(
14
+ list_reactions: List[str],
15
+ rule: Dict[int, str],
16
+ order: List[int],
17
+ cat: Optional[str],
18
+ ) -> List[Optional[str]]:
19
+ """
20
+ Processes a list of reaction SMILES strings to infer Atom-Atom Mappings (AAM)
21
+ using specified rules and optionally adds a catalyst if no mappings are
22
+ initially found.
23
+
24
+ Parameters:
25
+ - list_reactions (List[str]): A list of reaction SMILES strings.
26
+ - rule (Dict[int, str]): A dictionary mapping indices to rules for AAM inference.
27
+ - order (List[int]): A list indicating the order in which rules should be applied
28
+ to reactions.
29
+ - cat (Optional[str]): The SMILES string of the catalyst; can be None or empty
30
+ if no catalyst is to be used.
31
+
32
+ Returns:
33
+ - List[Optional[str]]: A list containing the first inferred AAM for each reaction
34
+ or None if AAM could not be inferred.
35
+ """
36
+ aam = []
37
+
38
+ for key, entry in enumerate(list_reactions):
39
+ if not entry:
40
+ aam.append(None) # Handling empty or invalid entries gracefully
41
+ continue
42
+
43
+ rsmi = remove_reagent_from_smiles(entry)
44
+ smart = aam_infer(rsmi, rule[order[key]])
45
+ if len(smart) == 0:
46
+ if cat and cat.strip():
47
+ rsmi = add_catalysis(rsmi, cat)
48
+ smart = aam_infer(rsmi, rule[order[key]])
49
+ else:
50
+ aam.append(None)
51
+ continue
52
+ aam.append(smart[0] if smart else None)
53
+
54
+ return aam
55
+
56
+
57
+ def get_mechanism(gml: dict, order: List[int], rsmi: str, cat: str = None) -> List[str]:
58
+ """
59
+ Computes the mechanism of a chemical reaction given the reaction SMILES,
60
+ order of reactants, and GML graph.
61
+
62
+ Parameters:
63
+ - gml (dict): Graph representation of the molecule.
64
+ - order (List[int]): Order of reactants involved in the reaction.
65
+ - rsmi (str): Reaction SMILES string.
66
+
67
+ Returns:
68
+ - List[str]: List of Atom-Atom Mappings (AAM) for each step in the reaction.
69
+ """
70
+ try:
71
+ rsmi = add_catalysis(rsmi, cat)
72
+ results, reaction_tree = perform_multi_step_reaction(gml, order, rsmi)
73
+
74
+ target_products = sorted(rsmi.split(">>")[1].split("."))
75
+ max_depth = len(results)
76
+ all_paths = find_all_paths(reaction_tree, target_products, rsmi, max_depth)
77
+ real_path = all_paths[0][1:] # remove the original
78
+ all_steps = get_aam_reactions(real_path, gml, order, cat)
79
+ return all_steps
80
+ except Exception as e:
81
+ print(e)
82
+ return []
@@ -0,0 +1,95 @@
1
+ from collections import Counter
2
+ from synkit.Chem.Reaction.standardize import Standardize
3
+
4
+ std = Standardize()
5
+
6
+
7
+ def remove_reagent_from_smiles(rsmi: str) -> str:
8
+ """
9
+ Removes common molecules from the reactants and products in a SMILES reaction string.
10
+
11
+ This function identifies the molecules that appear on both sides of the reaction
12
+ (reactants and products) and removes one occurrence of each common molecule from
13
+ both sides.
14
+
15
+ Parameters:
16
+ - rsmi (str): A SMILES string representing a chemical reaction in the form:
17
+ 'reactant1.reactant2...>>product1.product2...'
18
+
19
+ Returns:
20
+ - str: A new SMILES string with the common molecules removed, in the form:
21
+ 'reactant1.reactant2...>>product1.product2...'
22
+
23
+ Example:
24
+ >>> remove_reagent_from_smiles('CC=O.CC=O.CCC=O>>CC=CO.CC=O.CC=O')
25
+ 'CCC=O>>CC=CO'
26
+ """
27
+
28
+ # Split the input SMILES string into reactants and products
29
+ reactants, products = rsmi.split(">>")
30
+
31
+ # Split the reactants and products by '.' to separate molecules
32
+ reactant_molecules = reactants.split(".")
33
+ product_molecules = products.split(".")
34
+
35
+ # Count the occurrences of each molecule in reactants and products
36
+ reactant_count = Counter(reactant_molecules)
37
+ product_count = Counter(product_molecules)
38
+
39
+ # Find common molecules between reactants and products
40
+ common_molecules = set(reactant_count) & set(product_count)
41
+
42
+ # Remove common molecules by the minimum occurrences in both reactants and products
43
+ for molecule in common_molecules:
44
+ common_occurrences = min(reactant_count[molecule], product_count[molecule])
45
+
46
+ # Decrease the count by the common occurrences
47
+ reactant_count[molecule] -= common_occurrences
48
+ product_count[molecule] -= common_occurrences
49
+
50
+ # Rebuild the lists of reactant and product molecules after removal
51
+ filtered_reactant_molecules = [
52
+ molecule for molecule, count in reactant_count.items() for _ in range(count)
53
+ ]
54
+ filtered_product_molecules = [
55
+ molecule for molecule, count in product_count.items() for _ in range(count)
56
+ ]
57
+
58
+ # Join the remaining molecules back into SMILES strings
59
+ new_reactants = ".".join(filtered_reactant_molecules)
60
+ new_products = ".".join(filtered_product_molecules)
61
+
62
+ # Return the updated reaction string
63
+ return f"{new_reactants}>>{new_products}"
64
+
65
+
66
+ def add_catalysis(reaction_smiles, catalyst_smiles):
67
+ """
68
+ Adds a catalyst to both the reactant and product sides of a reaction SMILES string.
69
+ If the catalyst is None or an empty string, the function returns the
70
+ original reaction SMILES string.
71
+
72
+ Parameters:
73
+ - reaction_smiles (str): The SMILES string of the reaction ("reactants>>products").
74
+ - catalyst_smiles (str): The SMILES string of the catalyst,
75
+ which can be None or empty.
76
+
77
+ Returns:
78
+ - str: Modified reaction SMILES string with the catalyst included on both sides,
79
+ or the original if no valid catalyst is provided.
80
+ """
81
+ # Check if the catalyst is None or empty
82
+ if catalyst_smiles is None or catalyst_smiles == "":
83
+ return reaction_smiles
84
+
85
+ # Split the reaction SMILES into reactants and products
86
+ reactants, products = reaction_smiles.split(">>")
87
+
88
+ # Add the catalyst to both reactants and products
89
+ reactants_with_cat = ".".join([reactants, catalyst_smiles])
90
+ products_with_cat = ".".join([products, catalyst_smiles])
91
+
92
+ # Combine the modified reactants and products back into a reaction SMILES
93
+ new_reaction_smiles = ">>".join([reactants_with_cat, products_with_cat])
94
+
95
+ return new_reaction_smiles
@@ -0,0 +1,81 @@
1
+ import os
2
+ import torch
3
+ from typing import List
4
+ from synkit.IO.debug import setup_logging
5
+
6
+ from mod import smiles, ruleGMLString, DG, config
7
+
8
+ logger = setup_logging()
9
+
10
+
11
+ def deduplicateGraphs(initial):
12
+ res = []
13
+ for cand in initial:
14
+ for a in res:
15
+ if cand.isomorphism(a) != 0:
16
+ res.append(a) # the one we had already
17
+ break
18
+ else:
19
+ # didn't find any isomorphic, use the new one
20
+ res.append(cand)
21
+ return res
22
+
23
+
24
+ def rule_apply(
25
+ smiles_list: List[str], rule: str, verbose: int = 0, print_output: bool = False
26
+ ) -> DG:
27
+ """
28
+ Applies a reaction rule to a list of SMILES strings and optionally prints
29
+ the derivation graph.
30
+
31
+ This function first converts the SMILES strings into molecular graphs,
32
+ deduplicates them, sorts them based on the number of vertices, and
33
+ then applies the provided reaction rule in the GML string format.
34
+ The resulting derivation graph (DG) is returned.
35
+
36
+ Parameters:
37
+ - smiles_list (List[str]): A list of SMILES strings representing the molecules
38
+ to which the reaction rule will be applied.
39
+ - rule (str): The reaction rule in GML string format. This rule will be applied to the
40
+ molecules represented by the SMILES strings.
41
+ - verbose (int, optional): The verbosity level for logging or debugging.
42
+ Default is 0 (no verbosity).
43
+ - print_output (bool, optional): If True, the derivation graph will be printed
44
+ to the "out" directory. Default is False.
45
+
46
+ Returns:
47
+ - DG: The derivation graph (DG) after applying the reaction rule to the
48
+ initial molecules.
49
+
50
+ Raises:
51
+ - Exception: If an error occurs during the process of applying the rule,
52
+ an exception is raised.
53
+ """
54
+ try:
55
+ # Convert SMILES strings to molecular graphs and deduplicate
56
+ initial_molecules = [smiles(smile, add=False) for smile in smiles_list]
57
+ initial_molecules = deduplicateGraphs(initial_molecules)
58
+
59
+ # Sort molecules based on the number of vertices
60
+ initial_molecules = sorted(
61
+ initial_molecules, key=lambda molecule: molecule.numVertices, reverse=False
62
+ )
63
+
64
+ # Convert the reaction rule from GML string format to a reaction rule object
65
+ reaction_rule = ruleGMLString(rule)
66
+
67
+ # Create the derivation graph and apply the reaction rule
68
+ dg = DG(graphDatabase=initial_molecules)
69
+ config.dg.doRuleIsomorphismDuringBinding = False
70
+ dg.build().apply(initial_molecules, reaction_rule, verbosity=verbose)
71
+
72
+ # Optionally print the output to a directory
73
+ if print_output:
74
+ os.makedirs("out", exist_ok=True)
75
+ dg.print()
76
+
77
+ return dg
78
+
79
+ except Exception as e:
80
+ logger.error(f"An error occurred: {e}")
81
+ raise
synkit/Vis/__init__.py ADDED
File without changes