PyPI - synkit - Versions diffs - 0.0.1__py3-none-any.whl - Mend

synkit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

synkit/Chem/Fingerprint/__init__.py +0 -0
synkit/Chem/Fingerprint/fp_calculator.py +122 -0
synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
synkit/Chem/Fingerprint/transformation_fp.py +79 -0
synkit/Chem/Molecule/__init__.py +0 -0
synkit/Chem/Molecule/standardize.py +137 -0
synkit/Chem/Reaction/__init__.py +0 -0
synkit/Chem/Reaction/balance_check.py +162 -0
synkit/Chem/Reaction/cleanning.py +59 -0
synkit/Chem/Reaction/deionize.py +289 -0
synkit/Chem/Reaction/neutralize.py +256 -0
synkit/Chem/Reaction/reagent.py +102 -0
synkit/Chem/Reaction/standardize.py +157 -0
synkit/Chem/Reaction/tautomerize.py +168 -0
synkit/Graph/Cluster/__init__.py +0 -0
synkit/Graph/Cluster/morphism.py +83 -0
synkit/Graph/Feature/__init__.py +0 -0
synkit/Graph/Feature/graph_descriptors.py +325 -0
synkit/Graph/Feature/graph_fps.py +97 -0
synkit/Graph/Feature/graph_signature.py +236 -0
synkit/Graph/Feature/hash_fps.py +130 -0
synkit/Graph/Feature/morgan_fps.py +87 -0
synkit/Graph/Feature/path_fps.py +82 -0
synkit/Graph/__init.py +0 -0
synkit/IO/__init__.py +0 -0
synkit/IO/chem_converter.py +231 -0
synkit/IO/data_io.py +277 -0
synkit/IO/data_process.py +49 -0
synkit/IO/debug.py +78 -0
synkit/IO/dg_to_gml.py +124 -0
synkit/IO/gml_to_nx.py +119 -0
synkit/IO/graph_to_mol.py +110 -0
synkit/IO/mol_to_graph.py +282 -0
synkit/IO/nx_to_gml.py +200 -0
synkit/IO/parse_rule.py +172 -0
synkit/IO/smiles_to_id.py +119 -0
synkit/ITS/_misc.py +280 -0
synkit/ITS/aam_validator.py +254 -0
synkit/ITS/its_builder.py +94 -0
synkit/ITS/its_construction.py +213 -0
synkit/ITS/normalize_aam.py +183 -0
synkit/ITS/partial_expand.py +170 -0
synkit/Reactor/__init__.py +0 -0
synkit/Reactor/core_engine.py +164 -0
synkit/Reactor/inference.py +73 -0
synkit/Reactor/multi_step.py +227 -0
synkit/Reactor/multi_step_aam.py +82 -0
synkit/Reactor/reagent.py +95 -0
synkit/Reactor/rule_apply.py +81 -0
synkit/Vis/__init__.py +0 -0
synkit/Vis/chemical_graph_visualizer.py +378 -0
synkit/Vis/chemical_reaction_visualizer.py +133 -0
synkit/Vis/chemical_space.py +83 -0
synkit/Vis/embedding.py +92 -0
synkit/Vis/graph_visualizer.py +286 -0
synkit/Vis/pdf_writer.py +143 -0
synkit/Vis/rsmi_to_fig.py +169 -0
synkit/__init__.py +0 -0
synkit/_misc.py +181 -0
synkit-0.0.1.dist-info/METADATA +148 -0
synkit-0.0.1.dist-info/RECORD +63 -0
synkit-0.0.1.dist-info/WHEEL +4 -0
synkit-0.0.1.dist-info/licenses/LICENSE +21 -0

synkit/ITS/aam_validator.py ADDED Viewed

@@ -0,0 +1,254 @@
+import pandas as pd
+import networkx as nx
+from operator import eq
+from itertools import combinations
+from joblib import Parallel, delayed
+from typing import Dict, List, Tuple, Union, Optional
+from networkx.algorithms.isomorphism import generic_node_match, generic_edge_match
+from synkit.ITS.its_construction import ITSConstruction
+from synkit.IO.chem_converter import rsmi_to_graph
+from synkit.ITS._misc import get_rc, enumerate_tautomers, mapping_success_rate
+class AAMValidator:
+    def __init__(self):
+        """Initializes the AAMValidator class."""
+        pass
+    @staticmethod
+    def check_equivariant_graph(
+        its_graphs: List[nx.Graph],
+    ) -> Tuple[List[Tuple[int, int]], int]:
+        """
+        Checks for isomorphism among a list of ITS graphs and
+        identifies all pairs of isomorphic graphs.
+        Parameters:
+        - its_graphs (List[nx.Graph]): A list of ITS graphs.
+        Returns:
+        - List[Tuple[int, int]]: A list of tuples representing
+                pairs of indices of isomorphic graphs.
+        - int: The count of unique isomorphic graph pairs found.
+        """
+        nodeLabelNames = ["typesGH"]
+        nodeLabelDefault = ["*", False, 0, 0, ()]
+        nodeLabelOperator = [eq, eq, eq, eq, eq]
+        nodeMatch = generic_node_match(
+            nodeLabelNames, nodeLabelDefault, nodeLabelOperator
+        )
+        edgeMatch = generic_edge_match("order", 1, eq)
+        classified = []
+        for i, j in combinations(range(len(its_graphs)), 2):
+            if nx.is_isomorphic(
+                its_graphs[i], its_graphs[j], node_match=nodeMatch, edge_match=edgeMatch
+            ):
+                classified.append((i, j))
+        return classified, len(classified)
+    @staticmethod
+    def smiles_check(
+        mapped_smile: str,
+        ground_truth: str,
+        check_method: str = "RC",  # or 'ITS'
+        ignore_aromaticity: bool = False,
+    ) -> bool:
+        """
+        Checks the equivalence of mapped SMILES against ground truth
+        using reaction center (RC) or ITS graph method.
+        Parameters:
+        - mapped_smile (str): The mapped SMILES string.
+        - ground_truth (str): The ground truth SMILES string.
+        - check_method (str): The method used for validation ('RC' or 'ITS').
+        - ignore_aromaticity (bool): Flag to ignore aromaticity in ITS graph construction.
+        Returns:
+        - bool: True if the mapped SMILES is equivalent to the ground truth,
+        False otherwise.
+        """
+        its_graphs = []
+        rc_graphs = []
+        try:
+            for rsmi in [mapped_smile, ground_truth]:
+                G, H = rsmi_to_graph(
+                    rsmi=rsmi, sanitize=True, drop_non_aam=True, light_weight=True
+                )
+                ITS = ITSConstruction.ITSGraph(G, H, ignore_aromaticity)
+                its_graphs.append(ITS)
+                rc = get_rc(ITS)
+                rc_graphs.append(rc)
+            _, equivariant = AAMValidator.check_equivariant_graph(
+                rc_graphs if check_method == "RC" else its_graphs
+            )
+            return equivariant == 1
+        except Exception as e:
+            print("An error occurred:", str(e))
+            return False
+    @staticmethod
+    def smiles_check_tautomer(
+        mapped_smile: str,
+        ground_truth: str,
+        check_method: str = "RC",  # or 'ITS'
+        ignore_aromaticity: bool = False,
+    ) -> Optional[bool]:
+        """
+        Determines if a given mapped SMILE string is equivalent to any tautomer of
+        a ground truth SMILES string using a specified comparison method.
+        Parameters:
+        - mapped_smile (str): The mapped SMILES string to check against the tautomers of
+        the ground truth.
+        - ground_truth (str): The reference SMILES string for generating possible
+        tautomers.
+        - check_method (str): The method used for checking equivalence. Default is 'RC'.
+        Possible values are 'RC' for reaction center or 'ITS'.
+        - ignore_aromaticity (bool): Flag to ignore differences in aromaticity between
+        the mapped SMILE and the tautomers.Default is False.
+        Returns:
+        - Optional[bool]: True if the mapped SMILE matches any of the enumerated tautomers
+        of the ground truth according to the specified check method.
+        Returns False if no match is found.
+        Returns None if an error occurs during processing.
+        Raises:
+        - Exception: If an error occurs during the tautomer enumeration
+        or the comparison process.
+        """
+        try:
+            ground_truth_tautomers = enumerate_tautomers(ground_truth)
+            return any(
+                AAMValidator.smiles_check(
+                    mapped_smile, t, check_method, ignore_aromaticity
+                )
+                for t in ground_truth_tautomers
+            )
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return None
+    @staticmethod
+    def check_pair(
+        mapping: Dict[str, str],
+        mapped_col: str,
+        ground_truth_col: str,
+        check_method: str = "RC",
+        ignore_aromaticity: bool = False,
+        ignore_tautomers: bool = True,
+    ) -> bool:
+        """
+        Checks the equivalence between the mapped and ground truth
+        values within a given mapping dictionary, using a specified check method.
+        The check can optionally ignore aromaticity.
+        Parameters:
+        - mapping (Dict[str, str]): A dictionary containing the data entries to check.
+        - mapped_col (str): The key in the mapping dictionary corresponding
+        to the mapped value.
+        - ground_truth_col (str): The key in the mapping dictionary corresponding
+        to the ground truth value.
+        - check_method (str, optional): The method used for checking the equivalence.
+        Defaults to 'RC'.
+        - ignore_aromaticity (bool, optional): Flag to indicate whether aromaticity
+        should be ignored during the check. Defaults to False.
+        - ignore_tautomers (bool, optional): Flag to indicate whether tautomers
+        should be ignored during the check. Defaults to False.
+        Returns:
+        - bool: The result of the check, indicating whether the mapped value is
+        equivalent to the ground truth according to the specified method
+        and considerations regarding aromaticity.
+        """
+        if ignore_tautomers:
+            return AAMValidator.smiles_check(
+                mapping[mapped_col],
+                mapping[ground_truth_col],
+                check_method,
+                ignore_aromaticity,
+            )
+        else:
+            return AAMValidator.smiles_check_tautomer(
+                mapping[mapped_col],
+                mapping[ground_truth_col],
+                check_method,
+                ignore_aromaticity,
+            )
+    @staticmethod
+    def validate_smiles(
+        data: Union[pd.DataFrame, List[Dict[str, str]]],
+        ground_truth_col: str = "ground_truth",
+        mapped_cols: List[str] = ["rxn_mapper", "graphormer", "local_mapper"],
+        check_method: str = "RC",
+        ignore_aromaticity: bool = False,
+        n_jobs: int = 1,
+        verbose: int = 0,
+        ignore_tautomers=True,
+    ) -> List[Dict[str, Union[str, float, List[bool]]]]:
+        """
+        Validates collections of mapped SMILES against their ground truths for
+        multiple mappers and calculates the accuracy.
+        Parameters:
+        - data (Union[pd.DataFrame, List[Dict[str, str]]]):
+        The input data containing mapped and ground truth SMILES.
+        - id_col (str): The name of the column or key containing the reaction ID.
+        - ground_truth_col (str): The name of the column or key containing
+        the ground truth SMILES.
+        - mapped_cols (List[str]): The list of columns or keys containing
+        the mapped SMILES for different mappers.
+        - check_method (str): The method used for validation ('RC' or 'ITS').
+        - ignore_aromaticity (bool): Flag to ignore aromaticity in ITS graph construction.
+        - n_jobs (int): The number of parallel jobs to run.
+        - verbose (int): The verbosity level for joblib's parallel execution.
+        Returns:
+        - List[Dict[str, Union[str, float, List[bool]]]]: A list of dictionaries, each
+        containing the mapper name, accuracy, and individual results for each SMILES pair.
+        """
+        validation_results = []
+        for mapped_col in mapped_cols:
+            if isinstance(data, pd.DataFrame):
+                mappings = data.to_dict("records")
+            elif isinstance(data, list):
+                mappings = data
+            else:
+                raise ValueError(
+                    "Data must be either a pandas DataFrame or a list of dictionaries."
+                )
+            results = Parallel(n_jobs=n_jobs, verbose=verbose)(
+                delayed(AAMValidator.check_pair)(
+                    mapping,
+                    mapped_col,
+                    ground_truth_col,
+                    check_method,
+                    ignore_aromaticity,
+                    ignore_tautomers,
+                )
+                for mapping in mappings
+            )
+            accuracy = sum(results) / len(mappings) if mappings else 0
+            mapped_data = [value[mapped_col] for value in mappings]
+            validation_results.append(
+                {
+                    "mapper": mapped_col,
+                    "accuracy": round(100 * accuracy, 2),
+                    "results": results,
+                    "success_rate": mapping_success_rate(mapped_data),
+                }
+            )
+        return validation_results

synkit/ITS/its_builder.py ADDED Viewed

@@ -0,0 +1,94 @@
+import networkx as nx
+from copy import deepcopy
+class ITSBuilder:
+    @staticmethod
+    def update_atom_map(graph: nx.Graph) -> None:
+        """
+        Update the 'atom_map' of each node in a graph to match its node index.
+        Parameters:
+        - graph (nx.Graph): The graph whose node attributes are to be updated.
+        """
+        for node in graph.nodes():
+            graph.nodes[node]["atom_map"] = node
+    @staticmethod
+    def ITSGraph(G: nx.Graph, RC: nx.Graph) -> nx.Graph:
+        """
+        Creates an ITS graph based on graph G and the reaction center RC.
+        This function:
+        - Copies graph G to initialize ITS.
+        - Initializes 'typesGH' and edge orders for ITS.
+        - Establishes a mapping from RC's 'atom_map' to G's node indices.
+        - Updates nodes and edges in ITS based on attributes from RC using the established mapping.
+        Parameters:
+        - G (nx.Graph): The initial graph.
+        - RC (nx.Graph): The reaction center graph with modifications.
+        Returns:
+        - nx.Graph: The ITS graph with updated node and edge attributes based on RC.
+        """
+        # Step 1: Copy Graph G to form the initial ITS
+        ITS = deepcopy(G)
+        # Step 2: Initialize 'typesGH' for each node in ITS using attributes from G
+        for node in ITS.nodes():
+            node_attr = ITS.nodes[node]
+            typesGH = (
+                (
+                    node_attr.get("element", "*"),
+                    node_attr.get("aromatic", False),
+                    node_attr.get("hcount", 0),
+                    node_attr.get("charge", 0),
+                    node_attr.get("neighbors", []),
+                ),
+                (
+                    node_attr.get("element", "*"),
+                    node_attr.get("aromatic", False),
+                    node_attr.get("hcount", 0),
+                    node_attr.get("charge", 0),
+                    node_attr.get("neighbors", []),
+                ),
+            )
+            ITS.nodes[node]["typesGH"] = typesGH
+        # Step 3: Set edge orders in ITS as (order, order) and 'standard_order' as 0
+        for u, v in ITS.edges():
+            edge_attr = ITS[u][v]
+            order = edge_attr.get("order", 1.0)
+            ITS[u][v]["order"] = (order, order)
+            ITS[u][v]["standard_order"] = 0.0
+        # Mapping from atom_map in RC to node indices in G
+        atom_map_to_node = {
+            G.nodes[n]["atom_map"]: n for n in G.nodes if G.nodes[n]["atom_map"] != 0
+        }
+        # print(atom_map_to_node)
+        # Step 4: Update nodes in ITS based on RC
+        for rc_node, rc_attr in RC.nodes(data=True):
+            atom_map = rc_attr.get("atom_map")
+            if atom_map in atom_map_to_node:
+                target_node = atom_map_to_node[atom_map]
+                ITS.nodes[target_node].update(rc_attr)
+        # Step 5: Update and add edges based on RC
+        for rc_u, rc_v, rc_edge_attr in RC.edges(data=True):
+            rc_u_map = RC.nodes[rc_u].get("atom_map", rc_u)
+            rc_v_map = RC.nodes[rc_v].get("atom_map", rc_v)
+            rc_u_target = atom_map_to_node.get(rc_u_map)
+            rc_v_target = atom_map_to_node.get(rc_v_map)
+            if rc_u_target is not None and rc_v_target is not None:
+                if ITS.has_edge(rc_u_target, rc_v_target):
+                    ITS[rc_u_target][rc_v_target].update(rc_edge_attr)
+                else:
+                    ITS.add_edge(rc_u_target, rc_v_target, **rc_edge_attr)
+        # Update atom_map for all nodes to reflect their indices
+        ITSBuilder.update_atom_map(ITS)
+        return ITS

synkit/ITS/its_construction.py ADDED Viewed

@@ -0,0 +1,213 @@
+import networkx as nx
+from typing import Tuple, Dict, Any
+from copy import deepcopy
+class ITSConstruction:
+    @staticmethod
+    def ITSGraph(
+        G: nx.Graph,
+        H: nx.Graph,
+        ignore_aromaticity: bool = False,
+        attributes_defaults: Dict[str, Any] = None,
+        balance_its: bool = True,
+    ) -> nx.Graph:
+        """
+        Creates a Combined Graph Representation (CGR) from two input graphs G and H.
+        This function merges the nodes of G and H, preserving their attributes. Edges are
+        added based on their presence in G and/or H, with special labeling for edges
+        unique to one graph.
+        Parameters:
+        - G (nx.Graph): The first input graph.
+        - H (nx.Graph): The second input graph.
+        - ignore_aromaticity (bool): Whether to ignore aromaticity in the graphs.
+        Defaults to False.
+        - attributes_defaults (Dict[str, Any]): A dictionary of default attributes
+        to use for nodes that are not present in either G or H.
+        Returns:
+        - nx.Graph: The Combined Graph Representation as a new graph instance.
+        """
+        # Create a null graph from a copy of G to preserve attributes
+        if (balance_its and len(G.nodes()) <= len(H.nodes())) or (
+            not balance_its and len(G.nodes()) >= len(H.nodes())
+        ):
+            ITS = deepcopy(G)
+        else:
+            ITS = deepcopy(H)
+        ITS.remove_edges_from(list(ITS.edges()))
+        # Initialize a dictionary to hold node types
+        typesDict = dict()
+        # Add typeG and typeH attributes, or default attributes for "*" unknown elements
+        for v in list(ITS.nodes()):
+            # Check if v is in both G and H
+            if v not in G.nodes() or v not in H.nodes():
+                continue
+            else:
+                typesG = ITSConstruction.get_node_attributes_with_defaults(
+                    G, v, attributes_defaults
+                )  # node attribute in reactant graph
+                typesH = ITSConstruction.get_node_attributes_with_defaults(
+                    H, v, attributes_defaults
+                )  # node attribute in product graph
+                typesDict[v] = (typesG, typesH)
+        nx.set_node_attributes(ITS, typesDict, "typesGH")
+        # Add edges from G and H
+        ITS = ITSConstruction.add_edges_to_ITS(ITS, G, H, ignore_aromaticity)
+        return ITS
+    @staticmethod
+    def get_node_attribute(graph: nx.Graph, node: int, attribute: str, default):
+        """
+        Retrieves a specific attribute for a node in a graph, returning a default value if
+        the attribute is missing.
+        Parameters:
+        - graph (nx.Graph): The graph from which to retrieve the node attribute.
+        - node (int): The node identifier.
+        - attribute (str): The attribute to retrieve.
+        - default: The default value to return if the attribute is missing.
+        Returns:
+        - The value of the node attribute, or the default value if the attribute is
+        missing.
+        """
+        try:
+            return graph.nodes[node][attribute]
+        except KeyError:
+            return default
+    @staticmethod
+    def get_node_attributes_with_defaults(
+        graph: nx.Graph, node: int, attributes_defaults: Dict[str, Any] = None
+    ) -> Tuple:
+        """
+        Retrieves node attributes from a graph, assigning default values if they are
+        missing. Allows for an optional dictionary of attribute-default value pairs to
+        specify custom attributes and defaults.
+        Parameters:
+        - graph (nx.Graph): The graph from which to retrieve node attributes.
+        - node (int): The node identifier.
+        - attributes_defaults (Dict[str, Any], optional): A dictionary specifying
+        attributes and their default values.
+        Returns:
+        - Tuple: A tuple containing the node attributes in the order specified by
+        attributes_defaults.
+        """
+        if attributes_defaults is None:
+            attributes_defaults = {
+                "element": "*",
+                "aromatic": False,
+                "hcount": 0,
+                "charge": 0,
+                "neighbors": ["", ""],
+            }
+        return tuple(
+            ITSConstruction.get_node_attribute(graph, node, attr, default)
+            for attr, default in attributes_defaults.items()
+        )
+    @staticmethod
+    def add_edges_to_ITS(
+        ITS: nx.Graph, G: nx.Graph, H: nx.Graph, ignore_aromaticity: bool = False
+    ) -> nx.Graph:
+        """
+        Adds edges to the Combined Graph Representation (ITS) based on the edges of G and
+        H, and returns a new graph without modifying the original ITS.
+        Parameters:
+        - ITS (nx.Graph): The initial combined graph representation.
+        - G (nx.Graph): The first input graph.
+        - H (nx.Graph): The second input graph.
+        - ignore_aromaticity (bool): Whether to ignore aromaticity in the graphs. Defaults
+        to False.
+        Returns:
+        - nx.Graph: The updated graph with added edges.
+        """
+        new_ITS = ITS.copy()
+        # Add edges from G and H
+        for graph_from, graph_to, reverse in [(G, H, False), (H, G, True)]:
+            for u, v in graph_from.edges():
+                if not new_ITS.has_edge(u, v):
+                    if graph_to.has_edge(u, v) or graph_to.has_edge(v, u):
+                        edge_label = (
+                            (graph_from[u][v]["order"], graph_to[u][v]["order"])
+                            if graph_to.has_edge(u, v)
+                            else (
+                                (graph_from[v][u]["order"], graph_to[v][u]["order"])
+                                if reverse
+                                else (
+                                    graph_from[u][v]["order"],
+                                    graph_to[v][u]["order"],
+                                )
+                            )
+                        )
+                        new_ITS.add_edge(u, v, order=edge_label)
+                    else:
+                        edge_label = (
+                            (graph_from[u][v]["order"], 0)
+                            if not reverse
+                            else (0, graph_from[u][v]["order"])
+                        )
+                        new_ITS.add_edge(u, v, order=edge_label)
+        nodes_to_remove = [node for node in new_ITS.nodes() if not new_ITS.nodes[node]]
+        new_ITS.remove_nodes_from(nodes_to_remove)
+        new_ITS = ITSConstruction.add_standard_order_attribute(
+            new_ITS, ignore_aromaticity
+        )
+        return new_ITS
+    @staticmethod
+    def add_standard_order_attribute(
+        graph: nx.Graph, ignore_aromaticity: bool = False
+    ) -> nx.Graph:
+        """
+        Adds a 'standard_order' attribute to each edge in the provided NetworkX graph.
+        This attribute is calculated based on the existing 'order' attribute, which should
+        be a tuple associated with each edge. The 'standard_order' is computed by
+        subtracting the second element of the 'order' tuple from the first element.
+        If any element of the 'order' tuple is not an integer (e.g., '*'), it is treated
+        as 0 for the purpose of this computation.
+        Parameters:
+        - graph (NetworkX.Graph): A NetworkX graph where each edge has an 'order'
+        attribute formatted as a tuple.
+        Returns:
+        - NetworkX.Graph: The same graph passed as input, now with a 'standard_order'
+        attribute added to each edge, reflecting the computed standard order derived from
+        the 'order' attribute.
+        """
+        new_graph = graph.copy()
+        for u, v, data in new_graph.edges(data=True):
+            if "order" in data and isinstance(data["order"], tuple):
+                # Extract order values, replacing non-ints with 0
+                first_order = data["order"][0]
+                second_order = data["order"][1]
+                # Compute standard order
+                standard_order = first_order - second_order
+                if ignore_aromaticity:
+                    if abs(standard_order) < 1:  # to ignore aromaticity
+                        standard_order = 0
+                # Update the edge data with a new attribute 'standard_order'
+                new_graph[u][v]["standard_order"] = standard_order
+            else:
+                # If 'order' attribute is missing or not a tuple, 'standard_order' to 0
+                new_graph[u][v]["standard_order"] = 0
+        return new_graph