PyPI - synkit - Versions diffs - 0.0.1__py3-none-any.whl - Mend

synkit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

synkit/Chem/Fingerprint/__init__.py +0 -0
synkit/Chem/Fingerprint/fp_calculator.py +122 -0
synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
synkit/Chem/Fingerprint/transformation_fp.py +79 -0
synkit/Chem/Molecule/__init__.py +0 -0
synkit/Chem/Molecule/standardize.py +137 -0
synkit/Chem/Reaction/__init__.py +0 -0
synkit/Chem/Reaction/balance_check.py +162 -0
synkit/Chem/Reaction/cleanning.py +59 -0
synkit/Chem/Reaction/deionize.py +289 -0
synkit/Chem/Reaction/neutralize.py +256 -0
synkit/Chem/Reaction/reagent.py +102 -0
synkit/Chem/Reaction/standardize.py +157 -0
synkit/Chem/Reaction/tautomerize.py +168 -0
synkit/Graph/Cluster/__init__.py +0 -0
synkit/Graph/Cluster/morphism.py +83 -0
synkit/Graph/Feature/__init__.py +0 -0
synkit/Graph/Feature/graph_descriptors.py +325 -0
synkit/Graph/Feature/graph_fps.py +97 -0
synkit/Graph/Feature/graph_signature.py +236 -0
synkit/Graph/Feature/hash_fps.py +130 -0
synkit/Graph/Feature/morgan_fps.py +87 -0
synkit/Graph/Feature/path_fps.py +82 -0
synkit/Graph/__init.py +0 -0
synkit/IO/__init__.py +0 -0
synkit/IO/chem_converter.py +231 -0
synkit/IO/data_io.py +277 -0
synkit/IO/data_process.py +49 -0
synkit/IO/debug.py +78 -0
synkit/IO/dg_to_gml.py +124 -0
synkit/IO/gml_to_nx.py +119 -0
synkit/IO/graph_to_mol.py +110 -0
synkit/IO/mol_to_graph.py +282 -0
synkit/IO/nx_to_gml.py +200 -0
synkit/IO/parse_rule.py +172 -0
synkit/IO/smiles_to_id.py +119 -0
synkit/ITS/_misc.py +280 -0
synkit/ITS/aam_validator.py +254 -0
synkit/ITS/its_builder.py +94 -0
synkit/ITS/its_construction.py +213 -0
synkit/ITS/normalize_aam.py +183 -0
synkit/ITS/partial_expand.py +170 -0
synkit/Reactor/__init__.py +0 -0
synkit/Reactor/core_engine.py +164 -0
synkit/Reactor/inference.py +73 -0
synkit/Reactor/multi_step.py +227 -0
synkit/Reactor/multi_step_aam.py +82 -0
synkit/Reactor/reagent.py +95 -0
synkit/Reactor/rule_apply.py +81 -0
synkit/Vis/__init__.py +0 -0
synkit/Vis/chemical_graph_visualizer.py +378 -0
synkit/Vis/chemical_reaction_visualizer.py +133 -0
synkit/Vis/chemical_space.py +83 -0
synkit/Vis/embedding.py +92 -0
synkit/Vis/graph_visualizer.py +286 -0
synkit/Vis/pdf_writer.py +143 -0
synkit/Vis/rsmi_to_fig.py +169 -0
synkit/__init__.py +0 -0
synkit/_misc.py +181 -0
synkit-0.0.1.dist-info/METADATA +148 -0
synkit-0.0.1.dist-info/RECORD +63 -0
synkit-0.0.1.dist-info/WHEEL +4 -0
synkit-0.0.1.dist-info/licenses/LICENSE +21 -0

synkit/Reactor/multi_step.py ADDED Viewed

@@ -0,0 +1,227 @@
+from typing import List, Dict, Tuple
+from synkit.Chem.Reaction.standardize import Standardize
+from synkit.Reactor.core_engine import CoreEngine
+std = Standardize()
+def perform_multi_step_reaction(
+    gml_list: List[str], order: List[int], rsmi: str
+) -> Tuple[List[List[str]], Dict[str, List[str]]]:
+    """
+    Applies a sequence of multi-step reactions to a starting SMILES string. The function
+    processes each reaction step in a specified order, and returns both the intermediate
+    and final products, as well as a mapping of reactant SMILES to their
+    corresponding products.
+    Parameters:
+    - gml_list (List[str]): A list of reaction rules (in GML format) to be applied.
+    Each element corresponds to a reaction step.
+    - order (List[int]): A list of integers that defines the order in which the
+    reaction steps should be applied. Each integer is an index referring to the position
+    of a reaction rule in the `gml_list`.
+    - rsmi (str): The starting reaction SMILES string, representing the reactants for the
+    first reaction.
+    Returns:
+    - Tuple[List[List[str]], Dict[str, List[str]]]:
+        - A list of lists of SMILES strings, where each inner list contains the
+        RSMI generated  at each reaction step.
+        - A dictionary mapping each RSMI string to the resulting products after applying
+          the reaction rules. The keys are the input RSMIs, and the values are the
+          resulting product  SMILES strings.
+    """
+    # Initialize CoreEngine for reaction processing
+    core = CoreEngine()
+    # Initialize a dictionary to hold reaction results
+    reaction_results = {}
+    # List to store the results of each reaction step
+    all_steps: List[List[str]] = []
+    result: List[str] = [rsmi]  # Initial result is the input SMILES string
+    # Loop over the reaction steps in the specified order
+    for i, j in enumerate(order):
+        # Get the reaction SMILES (RSMI) for the current step
+        current_step_gml = gml_list[j]
+        new_result: List[str] = []  # List to hold products for this step
+        # Apply the reaction for each current reactant SMILES
+        for current_rsmi in result:
+            smi_lst = (
+                current_rsmi.split(">>")[0].split(
+                    "."
+                )  # Split reactants at the first step
+                if i == 0
+                else current_rsmi.split(">>")[1].split(
+                    "."
+                )  # Split products for subsequent steps
+            )
+            # Perform the reaction using the CoreEngine
+            o = core.perform_reaction(current_step_gml, smi_lst)
+            # Apply standardization on the products
+            o = [std.fit(i) for i in o]
+            # Collect the new results (products) from this reaction step
+            new_result.extend(o)
+            # Record the reaction results in the dictionary, mapping input RSMI to output products
+            if len(o) > 0:
+                reaction_results[current_rsmi] = o
+        # Update the result list for the next step
+        result = new_result
+        # Append the results of this step to the overall steps list
+        all_steps.append(result)
+    # Return the results: a list of all steps and a dictionary of reaction results
+    return all_steps, reaction_results
+def calculate_max_depth(reaction_tree, current_node=None, depth=0):
+    """
+    Calculate the maximum depth of a reaction tree.
+    Parameters:
+    - reaction_tree (dict): A dictionary where keys are reaction SMILES (RSMI)
+    and values are lists of product reactions.
+    - current_node (str): The current node in the tree being explored (reaction SMILES).
+    - depth (int): The current depth of the tree.
+    Returns:
+    - int: The maximum depth of the tree.
+    """
+    # If current_node is None, start from the root node (first key in the reaction tree)
+    if current_node is None:
+        current_node = list(reaction_tree.keys())[0]
+    # Get the products of the current node (reaction)
+    products = reaction_tree.get(current_node, [])
+    # If no products, we are at a leaf node, return the current depth
+    if not products:
+        return depth
+    # Recursively calculate the depth for each product and return the maximum
+    max_subtree_depth = max(
+        calculate_max_depth(reaction_tree, product, depth + 1) for product in products
+    )
+    return max_subtree_depth
+# def find_all_paths(
+#     reaction_tree,
+#     target_products,
+#     current_node,
+#     target_depth,
+#     current_depth=0,
+#     path=None,
+# ):
+#     """
+#     Recursively find all paths from the root to the maximum depth in the reaction tree.
+#     Parameters:
+#     - reaction_tree (dict): A dictionary of reaction SMILES with products.
+#     - current_node (str): The current node (reaction SMILES).
+#     - target_depth (int): The depth at which the product matches the root's product.
+#     - current_depth (int): The current depth of the search.
+#     - path (list): The current path in the tree.
+#     Returns:
+#     - List of all paths to the max depth.
+#     """
+#     if path is None:
+#         path = []
+#     # Add the current node (reaction SMILES) to the path
+#     path.append(current_node)
+#     # If we have reached the target depth, check the product
+#     if current_depth == target_depth:
+#         # Extract products of the current node
+#         products = sorted(current_node.split(">>")[1].split("."))
+#         return [path] if products == target_products else []
+#     # If we haven't reached the target depth, recurse on the products
+#     paths = []
+#     for product in reaction_tree.get(current_node, []):
+#         paths.extend(
+#             find_all_paths(
+#                 reaction_tree,
+#                 target_products,
+#                 product,
+#                 target_depth,
+#                 current_depth + 1,
+#                 path.copy(),
+#             )
+#         )
+#     return paths
+def find_all_paths(
+    reaction_tree,
+    target_products,
+    current_node,
+    target_depth,
+    current_depth=0,
+    path=None,
+):
+    """
+    Recursively find all paths from the root to the maximum depth in the reaction tree.
+    Parameters:
+    - reaction_tree (dict): A dictionary of reaction SMILES with products.
+    - current_node (str): The current node (reaction SMILES).
+    - target_depth (int): The depth at which the product matches the root's product.
+    - current_depth (int): The current depth of the search.
+    - path (list): The current path in the tree.
+    Returns:
+    - List of all paths to the max depth.
+    """
+    if path is None:
+        path = []
+    # Add the current node (reaction SMILES) to the path
+    path.append(current_node)
+    # If we have reached the target depth, check the product
+    if current_depth == target_depth:
+        # Extract products of the current node
+        current_products = sorted(
+            current_node.split(">>")[1].split("."), key=len
+        )  # Sort by length of SMILES
+        largest_current_product = current_products[-1] if current_products else None
+        # Process target_products to get the largest product
+        sorted_target_products = sorted(
+            target_products, key=len
+        )  # target_products should be a string here
+        largest_target_product = (
+            sorted_target_products[-1] if sorted_target_products else None
+        )
+        # Compare the largest elements
+        return [path] if largest_current_product == largest_target_product else []
+    # If we haven't reached the target depth, recurse on the products
+    paths = []
+    for product in reaction_tree.get(current_node, []):
+        paths.extend(
+            find_all_paths(
+                reaction_tree,
+                target_products,
+                product,
+                target_depth,
+                current_depth + 1,
+                path.copy(),
+            )
+        )
+    return paths

synkit/Reactor/multi_step_aam.py ADDED Viewed

@@ -0,0 +1,82 @@
+from typing import List, Optional, Dict
+from synkit.Reactor.reagent import (
+    remove_reagent_from_smiles,
+    add_catalysis,
+)
+from synkit.Reactor.multi_step import (
+    perform_multi_step_reaction,
+    find_all_paths,
+)
+from synkit.Reactor.inference import aam_infer
+def get_aam_reactions(
+    list_reactions: List[str],
+    rule: Dict[int, str],
+    order: List[int],
+    cat: Optional[str],
+) -> List[Optional[str]]:
+    """
+    Processes a list of reaction SMILES strings to infer Atom-Atom Mappings (AAM)
+    using specified rules and optionally adds a catalyst if no mappings are
+    initially found.
+    Parameters:
+    - list_reactions (List[str]): A list of reaction SMILES strings.
+    - rule (Dict[int, str]): A dictionary mapping indices to rules for AAM inference.
+    - order (List[int]): A list indicating the order in which rules should be applied
+    to reactions.
+    - cat (Optional[str]): The SMILES string of the catalyst; can be None or empty
+    if no catalyst is to be used.
+    Returns:
+    - List[Optional[str]]: A list containing the first inferred AAM for each reaction
+    or None if AAM could not be inferred.
+    """
+    aam = []
+    for key, entry in enumerate(list_reactions):
+        if not entry:
+            aam.append(None)  # Handling empty or invalid entries gracefully
+            continue
+        rsmi = remove_reagent_from_smiles(entry)
+        smart = aam_infer(rsmi, rule[order[key]])
+        if len(smart) == 0:
+            if cat and cat.strip():
+                rsmi = add_catalysis(rsmi, cat)
+                smart = aam_infer(rsmi, rule[order[key]])
+            else:
+                aam.append(None)
+                continue
+        aam.append(smart[0] if smart else None)
+    return aam
+def get_mechanism(gml: dict, order: List[int], rsmi: str, cat: str = None) -> List[str]:
+    """
+    Computes the mechanism of a chemical reaction given the reaction SMILES,
+    order of reactants, and GML graph.
+    Parameters:
+    - gml (dict): Graph representation of the molecule.
+    - order (List[int]): Order of reactants involved in the reaction.
+    - rsmi (str): Reaction SMILES string.
+    Returns:
+    - List[str]: List of Atom-Atom Mappings (AAM) for each step in the reaction.
+    """
+    try:
+        rsmi = add_catalysis(rsmi, cat)
+        results, reaction_tree = perform_multi_step_reaction(gml, order, rsmi)
+        target_products = sorted(rsmi.split(">>")[1].split("."))
+        max_depth = len(results)
+        all_paths = find_all_paths(reaction_tree, target_products, rsmi, max_depth)
+        real_path = all_paths[0][1:]  # remove the original
+        all_steps = get_aam_reactions(real_path, gml, order, cat)
+        return all_steps
+    except Exception as e:
+        print(e)
+        return []

synkit/Reactor/reagent.py ADDED Viewed

@@ -0,0 +1,95 @@
+from collections import Counter
+from synkit.Chem.Reaction.standardize import Standardize
+std = Standardize()
+def remove_reagent_from_smiles(rsmi: str) -> str:
+    """
+    Removes common molecules from the reactants and products in a SMILES reaction string.
+    This function identifies the molecules that appear on both sides of the reaction
+    (reactants and products) and removes one occurrence of each common molecule from
+    both sides.
+    Parameters:
+    - rsmi (str): A SMILES string representing a chemical reaction in the form:
+    'reactant1.reactant2...>>product1.product2...'
+    Returns:
+    - str: A new SMILES string with the common molecules removed, in the form:
+    'reactant1.reactant2...>>product1.product2...'
+    Example:
+    >>> remove_reagent_from_smiles('CC=O.CC=O.CCC=O>>CC=CO.CC=O.CC=O')
+    'CCC=O>>CC=CO'
+    """
+    # Split the input SMILES string into reactants and products
+    reactants, products = rsmi.split(">>")
+    # Split the reactants and products by '.' to separate molecules
+    reactant_molecules = reactants.split(".")
+    product_molecules = products.split(".")
+    # Count the occurrences of each molecule in reactants and products
+    reactant_count = Counter(reactant_molecules)
+    product_count = Counter(product_molecules)
+    # Find common molecules between reactants and products
+    common_molecules = set(reactant_count) & set(product_count)
+    # Remove common molecules by the minimum occurrences in both reactants and products
+    for molecule in common_molecules:
+        common_occurrences = min(reactant_count[molecule], product_count[molecule])
+        # Decrease the count by the common occurrences
+        reactant_count[molecule] -= common_occurrences
+        product_count[molecule] -= common_occurrences
+    # Rebuild the lists of reactant and product molecules after removal
+    filtered_reactant_molecules = [
+        molecule for molecule, count in reactant_count.items() for _ in range(count)
+    ]
+    filtered_product_molecules = [
+        molecule for molecule, count in product_count.items() for _ in range(count)
+    ]
+    # Join the remaining molecules back into SMILES strings
+    new_reactants = ".".join(filtered_reactant_molecules)
+    new_products = ".".join(filtered_product_molecules)
+    # Return the updated reaction string
+    return f"{new_reactants}>>{new_products}"
+def add_catalysis(reaction_smiles, catalyst_smiles):
+    """
+    Adds a catalyst to both the reactant and product sides of a reaction SMILES string.
+    If the catalyst is None or an empty string, the function returns the
+    original reaction SMILES string.
+    Parameters:
+    - reaction_smiles (str): The SMILES string of the reaction ("reactants>>products").
+    - catalyst_smiles (str): The SMILES string of the catalyst,
+    which can be None or empty.
+    Returns:
+    - str: Modified reaction SMILES string with the catalyst included on both sides,
+    or the original if no valid catalyst is provided.
+    """
+    # Check if the catalyst is None or empty
+    if catalyst_smiles is None or catalyst_smiles == "":
+        return reaction_smiles
+    # Split the reaction SMILES into reactants and products
+    reactants, products = reaction_smiles.split(">>")
+    # Add the catalyst to both reactants and products
+    reactants_with_cat = ".".join([reactants, catalyst_smiles])
+    products_with_cat = ".".join([products, catalyst_smiles])
+    # Combine the modified reactants and products back into a reaction SMILES
+    new_reaction_smiles = ">>".join([reactants_with_cat, products_with_cat])
+    return new_reaction_smiles

synkit/Reactor/rule_apply.py ADDED Viewed

@@ -0,0 +1,81 @@
+import os
+import torch
+from typing import List
+from synkit.IO.debug import setup_logging
+from mod import smiles, ruleGMLString, DG, config
+logger = setup_logging()
+def deduplicateGraphs(initial):
+    res = []
+    for cand in initial:
+        for a in res:
+            if cand.isomorphism(a) != 0:
+                res.append(a)  # the one we had already
+                break
+        else:
+            # didn't find any isomorphic, use the new one
+            res.append(cand)
+    return res
+def rule_apply(
+    smiles_list: List[str], rule: str, verbose: int = 0, print_output: bool = False
+) -> DG:
+    """
+    Applies a reaction rule to a list of SMILES strings and optionally prints
+    the derivation graph.
+    This function first converts the SMILES strings into molecular graphs,
+    deduplicates them, sorts them based on the number of vertices, and
+    then applies the provided reaction rule in the GML string format.
+    The resulting derivation graph (DG) is returned.
+    Parameters:
+    - smiles_list (List[str]): A list of SMILES strings representing the molecules
+    to which the reaction rule will be applied.
+    - rule (str): The reaction rule in GML string format. This rule will be applied to the
+    molecules represented by the SMILES strings.
+    - verbose (int, optional): The verbosity level for logging or debugging.
+    Default is 0 (no verbosity).
+    - print_output (bool, optional): If True, the derivation graph will be printed
+    to the "out" directory. Default is False.
+    Returns:
+    - DG: The derivation graph (DG) after applying the reaction rule to the
+    initial molecules.
+    Raises:
+    - Exception: If an error occurs during the process of applying the rule,
+    an exception is raised.
+    """
+    try:
+        # Convert SMILES strings to molecular graphs and deduplicate
+        initial_molecules = [smiles(smile, add=False) for smile in smiles_list]
+        initial_molecules = deduplicateGraphs(initial_molecules)
+        # Sort molecules based on the number of vertices
+        initial_molecules = sorted(
+            initial_molecules, key=lambda molecule: molecule.numVertices, reverse=False
+        )
+        # Convert the reaction rule from GML string format to a reaction rule object
+        reaction_rule = ruleGMLString(rule)
+        # Create the derivation graph and apply the reaction rule
+        dg = DG(graphDatabase=initial_molecules)
+        config.dg.doRuleIsomorphismDuringBinding = False
+        dg.build().apply(initial_molecules, reaction_rule, verbosity=verbose)
+        # Optionally print the output to a directory
+        if print_output:
+            os.makedirs("out", exist_ok=True)
+            dg.print()
+        return dg
+    except Exception as e:
+        logger.error(f"An error occurred: {e}")
+        raise

synkit/Vis/__init__.py ADDED Viewed

File without changes