PyPI - synkit - Versions diffs - 0.0.1__py3-none-any.whl - Mend

synkit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

synkit/Chem/Fingerprint/__init__.py +0 -0
synkit/Chem/Fingerprint/fp_calculator.py +122 -0
synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
synkit/Chem/Fingerprint/transformation_fp.py +79 -0
synkit/Chem/Molecule/__init__.py +0 -0
synkit/Chem/Molecule/standardize.py +137 -0
synkit/Chem/Reaction/__init__.py +0 -0
synkit/Chem/Reaction/balance_check.py +162 -0
synkit/Chem/Reaction/cleanning.py +59 -0
synkit/Chem/Reaction/deionize.py +289 -0
synkit/Chem/Reaction/neutralize.py +256 -0
synkit/Chem/Reaction/reagent.py +102 -0
synkit/Chem/Reaction/standardize.py +157 -0
synkit/Chem/Reaction/tautomerize.py +168 -0
synkit/Graph/Cluster/__init__.py +0 -0
synkit/Graph/Cluster/morphism.py +83 -0
synkit/Graph/Feature/__init__.py +0 -0
synkit/Graph/Feature/graph_descriptors.py +325 -0
synkit/Graph/Feature/graph_fps.py +97 -0
synkit/Graph/Feature/graph_signature.py +236 -0
synkit/Graph/Feature/hash_fps.py +130 -0
synkit/Graph/Feature/morgan_fps.py +87 -0
synkit/Graph/Feature/path_fps.py +82 -0
synkit/Graph/__init.py +0 -0
synkit/IO/__init__.py +0 -0
synkit/IO/chem_converter.py +231 -0
synkit/IO/data_io.py +277 -0
synkit/IO/data_process.py +49 -0
synkit/IO/debug.py +78 -0
synkit/IO/dg_to_gml.py +124 -0
synkit/IO/gml_to_nx.py +119 -0
synkit/IO/graph_to_mol.py +110 -0
synkit/IO/mol_to_graph.py +282 -0
synkit/IO/nx_to_gml.py +200 -0
synkit/IO/parse_rule.py +172 -0
synkit/IO/smiles_to_id.py +119 -0
synkit/ITS/_misc.py +280 -0
synkit/ITS/aam_validator.py +254 -0
synkit/ITS/its_builder.py +94 -0
synkit/ITS/its_construction.py +213 -0
synkit/ITS/normalize_aam.py +183 -0
synkit/ITS/partial_expand.py +170 -0
synkit/Reactor/__init__.py +0 -0
synkit/Reactor/core_engine.py +164 -0
synkit/Reactor/inference.py +73 -0
synkit/Reactor/multi_step.py +227 -0
synkit/Reactor/multi_step_aam.py +82 -0
synkit/Reactor/reagent.py +95 -0
synkit/Reactor/rule_apply.py +81 -0
synkit/Vis/__init__.py +0 -0
synkit/Vis/chemical_graph_visualizer.py +378 -0
synkit/Vis/chemical_reaction_visualizer.py +133 -0
synkit/Vis/chemical_space.py +83 -0
synkit/Vis/embedding.py +92 -0
synkit/Vis/graph_visualizer.py +286 -0
synkit/Vis/pdf_writer.py +143 -0
synkit/Vis/rsmi_to_fig.py +169 -0
synkit/__init__.py +0 -0
synkit/_misc.py +181 -0
synkit-0.0.1.dist-info/METADATA +148 -0
synkit-0.0.1.dist-info/RECORD +63 -0
synkit-0.0.1.dist-info/WHEEL +4 -0
synkit-0.0.1.dist-info/licenses/LICENSE +21 -0

synkit/IO/graph_to_mol.py ADDED Viewed

@@ -0,0 +1,110 @@
+import networkx as nx
+from rdkit import Chem
+from typing import Dict
+class GraphToMol:
+    """
+    Converts a NetworkX graph representation of a molecule into an RDKit molecule object,
+    considering specific node and edge attributes for the construction of the molecule.
+    This includes handling different bond orders and optional hydrogen counts on nodes.
+    """
+    def __init__(
+        self,
+        node_attributes: Dict[str, str] = {
+            "element": "element",
+            "charge": "charge",
+            "atom_map": "atom_map",
+        },
+        edge_attributes: Dict[str, str] = {"order": "order"},
+    ):
+        """
+        Initializes the GraphToMol object with mappings for node and edge attributes.
+        Parameters:
+        - node_attributes (Dict[str, str]): Mapping of attribute names to node keys in the graph.
+        - edge_attributes (Dict[str, str]): Mapping of attribute names to edge keys in the graph.
+        """
+        self.node_attributes = node_attributes
+        self.edge_attributes = edge_attributes
+    def graph_to_mol(
+        self,
+        graph: nx.Graph,
+        ignore_bond_order: bool = False,
+        sanitize: bool = True,
+        use_h_count: bool = False,
+    ) -> Chem.Mol:
+        """
+        Converts a NetworkX graph into an RDKit molecule.
+        Parameters:
+        - graph (nx.Graph): The molecule graph.
+        - ignore_bond_order (bool): If True, all bonds are treated as single.
+        - sanitize (bool): If True, attempts to sanitize the molecule.
+        - use_h_count (bool): If True, adjusts hydrogen counts using the 'hcount' attribute.
+        Returns:
+        - Chem.Mol: An RDKit molecule object constructed from the graph.
+        """
+        mol = Chem.RWMol()
+        node_to_idx: Dict[int, int] = {}
+        for node, data in graph.nodes(data=True):
+            element = data.get(self.node_attributes["element"], "C")
+            charge = data.get(self.node_attributes["charge"], 0)
+            atom_map = (
+                data.get(self.node_attributes["atom_map"], 0)
+                if "atom_map" in data.keys()
+                else None
+            )
+            hcount = (
+                data.get("hcount", 0)
+                if use_h_count and "hcount" in data.keys()
+                else None
+            )
+            atom = Chem.Atom(element)
+            atom.SetFormalCharge(charge)
+            if atom_map is not None:
+                atom.SetAtomMapNum(atom_map)
+            if hcount is not None:
+                atom.SetNoImplicit(True)
+                atom.SetNumExplicitHs(hcount)
+            idx = mol.AddAtom(atom)
+            node_to_idx[node] = idx
+        for u, v, data in graph.edges(data=True):
+            bond_order = (
+                1
+                if ignore_bond_order
+                else abs(data.get(self.edge_attributes["order"], 1))
+            )
+            bond_type = self.get_bond_type_from_order(bond_order)
+            mol.AddBond(node_to_idx[u], node_to_idx[v], bond_type)
+        if sanitize:
+            Chem.SanitizeMol(mol)
+        return mol
+    @staticmethod
+    def get_bond_type_from_order(order: float) -> Chem.BondType:
+        """
+        Converts a numerical bond order into the corresponding RDKit BondType.
+        Parameters:
+        - order (float): The bond order.
+        Returns:
+        - Chem.BondType: The corresponding RDKit bond type for the given order.
+        """
+        if order == 1:
+            return Chem.BondType.SINGLE
+        elif order == 2:
+            return Chem.BondType.DOUBLE
+        elif order == 3:
+            return Chem.BondType.TRIPLE
+        return Chem.BondType.AROMATIC

synkit/IO/mol_to_graph.py ADDED Viewed

@@ -0,0 +1,282 @@
+from rdkit import Chem
+from rdkit.Chem import AllChem
+import networkx as nx
+from typing import Any, Dict, Optional
+import random
+from synkit.IO.debug import setup_logging
+logger = setup_logging()
+class MolToGraph:
+    """
+    A class for converting molecules from SMILES strings to graph representations using
+    RDKit and NetworkX. It supports creating both lightweight and detailed
+    graph representations with customizable atom and bond attributes,
+    allowing for exclusion of atoms without atom mapping numbers.
+    """
+    def __init__(self) -> None:
+        """
+        Initialize the MolToGraph class.
+        """
+        pass
+    @staticmethod
+    def add_partial_charges(mol: Chem.Mol) -> None:
+        """
+        Computes and assigns Gasteiger partial charges to each atom in the given molecule.
+        Parameters:
+        - mol (Chem.Mol): An RDKit molecule object.
+        """
+        try:
+            AllChem.ComputeGasteigerCharges(mol)
+        except Exception as e:
+            logger.error(f"Error computing Gasteiger charges: {e}")
+    @staticmethod
+    def get_stereochemistry(atom: Chem.Atom) -> str:
+        """
+        Determines the stereochemistry (R/S configuration) of a given atom.
+        Parameters:
+        - atom (Chem.Atom): An RDKit atom object.
+        Returns:
+        - str: The stereochemistry ('R', 'S', or 'N' for non-chiral).
+        """
+        chiral_tag = atom.GetChiralTag()
+        return (
+            "S"
+            if chiral_tag == Chem.ChiralType.CHI_TETRAHEDRAL_CCW
+            else "R" if chiral_tag == Chem.ChiralType.CHI_TETRAHEDRAL_CW else "N"
+        )
+    @staticmethod
+    def get_bond_stereochemistry(bond: Chem.Bond) -> str:
+        """
+        Determines the stereochemistry (E/Z configuration) of a given bond.
+        Parameters:
+        - bond (Chem.Bond): An RDKit bond object.
+        Returns:
+        - str: The stereochemistry ('E', 'Z', or 'N' for non-stereospecific
+        or non-double bonds).
+        """
+        if bond.GetBondType() != Chem.BondType.DOUBLE:
+            return "N"
+        stereo = bond.GetStereo()
+        if stereo == Chem.BondStereo.STEREOE:
+            return "E"
+        elif stereo == Chem.BondStereo.STEREOZ:
+            return "Z"
+        return "N"
+    @staticmethod
+    def has_atom_mapping(mol: Chem.Mol) -> bool:
+        """
+        Check if the given molecule has any atom mapping numbers.
+        Parameters:
+        - mol (Chem.Mol): An RDKit molecule object.
+        Returns:
+        - bool: True if any atom in the molecule has a mapping number, False otherwise.
+        """
+        return any(atom.HasProp("molAtomMapNumber") for atom in mol.GetAtoms())
+    @staticmethod
+    def random_atom_mapping(mol: Chem.Mol) -> Chem.Mol:
+        """
+        Assigns a random atom mapping number to each atom in the given molecule.
+        Parameters:
+        - mol (Chem.Mol): An RDKit molecule object.
+        Returns:
+        - Chem.Mol: The RDKit molecule object with random atom mapping numbers assigned.
+        """
+        atom_indices = list(range(1, mol.GetNumAtoms() + 1))
+        random.shuffle(atom_indices)
+        for atom, idx in zip(mol.GetAtoms(), atom_indices):
+            atom.SetProp("molAtomMapNumber", str(idx))
+        return mol
+    @classmethod
+    def mol_to_graph(
+        cls,
+        mol: Chem.Mol,
+        drop_non_aam: Optional[bool] = False,
+        light_weight: Optional[bool] = False,
+        use_index_as_atom_map: Optional[bool] = False,
+    ) -> nx.Graph:
+        """
+        Converts an RDKit molecule object to a NetworkX graph with specified atom and bond
+        attributes. Optionally excludes atoms without atom mapping numbers
+        if drop_non_aam is True.
+        Parameters:
+        - mol (Chem.Mol): An RDKit molecule object.
+        - drop_non_aam (bool, optional): If True, nodes without atom mapping numbers will
+          be dropped. This option is useful for focusing on labeled parts of a molecule.
+        - light_weight (bool, optional): If True, creates a graph with minimal attributes.
+          This option is useful for reducing memory footprint or simplifying the graph.
+        - use_index_as_atom_map (bool, optional): If True, uses the index of atoms as
+        atom map numbers, otherwise uses existing atom map numbers or indices if not set.
+        Raises:
+        - ValueError: If `drop_non_aam` and `use_index_as_atom_map` are not both True or
+        both False.
+        Returns:
+        - nx.Graph: A NetworkX graph representing the molecule.
+        """
+        if drop_non_aam and not use_index_as_atom_map:
+            raise ValueError(
+                "drop_non_aam and use_index_as_atom_map must be both False or both True."
+            )
+        if light_weight:
+            return cls._create_light_weight_graph(
+                mol, drop_non_aam, use_index_as_atom_map
+            )
+        else:
+            return cls._create_detailed_graph(mol, drop_non_aam, use_index_as_atom_map)
+    @classmethod
+    def _create_light_weight_graph(
+        cls,
+        mol: Chem.Mol,
+        drop_non_aam: bool = False,
+        use_index_as_atom_map: bool = False,
+    ) -> nx.Graph:
+        graph = nx.Graph()
+        for atom in mol.GetAtoms():
+            if use_index_as_atom_map:
+                # Use the atom map number if present; otherwise, use index + 1
+                atom_id = (
+                    atom.GetAtomMapNum()
+                    if atom.GetAtomMapNum() != 0
+                    else atom.GetIdx() + 1
+                )
+            else:
+                # Always use index + 1
+                atom_id = atom.GetIdx() + 1
+            if drop_non_aam and atom.GetAtomMapNum() == 0:
+                continue  # Skip atoms without atom map numbers if drop_non_aam is True
+            graph.add_node(
+                atom_id,
+                element=atom.GetSymbol(),  # Store atom's element symbol
+                aromatic=atom.GetIsAromatic(),
+                hcount=atom.GetTotalNumHs(),
+                charge=atom.GetFormalCharge(),
+                neighbors=sorted(
+                    neighbor.GetSymbol() for neighbor in atom.GetNeighbors()
+                ),
+                atom_map=atom.GetAtomMapNum(),
+            )
+            # Handle edges based on atom IDs and consistency checks
+            for bond in atom.GetBonds():
+                neighbor = bond.GetOtherAtom(atom)
+                if use_index_as_atom_map:
+                    # Use the atom map number if present; otherwise, use index + 1
+                    neighbor_id = (
+                        neighbor.GetAtomMapNum()
+                        if neighbor.GetAtomMapNum() != 0
+                        else neighbor.GetIdx() + 1
+                    )
+                else:
+                    # Always use index + 1 for the neighbor
+                    neighbor_id = neighbor.GetIdx() + 1
+                if not drop_non_aam or neighbor.GetAtomMapNum() != 0:
+                    graph.add_edge(
+                        atom_id, neighbor_id, order=bond.GetBondTypeAsDouble()
+                    )
+        return graph
+    @classmethod
+    def _create_detailed_graph(
+        cls,
+        mol: Chem.Mol,
+        drop_non_aam: bool = True,
+        use_index_as_atom_map: bool = True,
+    ) -> nx.Graph:
+        cls.add_partial_charges(mol)  # Compute charges if not already present
+        graph = nx.Graph()
+        index_to_id = {}
+        for atom in mol.GetAtoms():
+            if use_index_as_atom_map:
+                # Use the atom map number if present; otherwise, use index + 1
+                atom_id = (
+                    atom.GetAtomMapNum()
+                    if atom.GetAtomMapNum() != 0
+                    else atom.GetIdx() + 1
+                )
+            else:
+                # Always use index + 1
+                atom_id = atom.GetIdx() + 1
+            if drop_non_aam and atom.GetAtomMapNum() == 0:
+                continue  # Skip atoms without atom map numbers if drop_non_aam is True
+            props = cls._gather_atom_properties(atom)
+            index_to_id[atom.GetIdx()] = atom_id
+            graph.add_node(atom_id, **props)
+        for bond in mol.GetBonds():
+            begin_atom_id = index_to_id.get(bond.GetBeginAtomIdx())
+            end_atom_id = index_to_id.get(bond.GetEndAtomIdx())
+            if begin_atom_id and end_atom_id:
+                # Apply consistent ID handling for edges
+                graph.add_edge(
+                    begin_atom_id, end_atom_id, **cls._gather_bond_properties(bond)
+                )
+        return graph
+    @staticmethod
+    def _gather_atom_properties(atom: Chem.Atom) -> Dict[str, Any]:
+        """Collect all relevant properties from an atom to use
+        as graph node attributes."""
+        gasteiger_charge = (
+            round(float(atom.GetProp("_GasteigerCharge")), 3)
+            if atom.HasProp("_GasteigerCharge")
+            else 0.0
+        )
+        return {
+            "charge": atom.GetFormalCharge(),
+            "hcount": atom.GetTotalNumHs(),
+            "aromatic": atom.GetIsAromatic(),
+            "element": atom.GetSymbol(),
+            "atom_map": atom.GetAtomMapNum(),
+            "isomer": MolToGraph.get_stereochemistry(atom),
+            "partial_charge": gasteiger_charge,
+            "hybridization": str(atom.GetHybridization()),
+            "in_ring": atom.IsInRing(),
+            "implicit_hcount": atom.GetNumImplicitHs(),
+            "neighbors": sorted(
+                neighbor.GetSymbol() for neighbor in atom.GetNeighbors()
+            ),
+        }
+    @staticmethod
+    def _gather_bond_properties(bond: Chem.Bond) -> Dict[str, Any]:
+        """Collect all relevant properties from a bond to use as graph edge attributes."""
+        return {
+            "order": bond.GetBondTypeAsDouble(),
+            "ez_isomer": MolToGraph.get_bond_stereochemistry(bond),
+            "bond_type": str(bond.GetBondType()),
+            "conjugated": bond.GetIsConjugated(),
+            "in_ring": bond.IsInRing(),
+        }

synkit/IO/nx_to_gml.py ADDED Viewed

@@ -0,0 +1,200 @@
+import networkx as nx
+from typing import Tuple, Dict, List
+from synkit.ITS._misc import expand_hydrogens
+class NXToGML:
+    def __init__(self) -> None:
+        pass
+    @staticmethod
+    def _charge_to_string(charge):
+        """
+        Converts an integer charge into a string representation.
+        Parameters:
+        - charge (int): The charge value, which can be positive, negative, or zero.
+        Returns:
+        - str: The string representation of the charge.
+        """
+        if charge > 0:
+            return (
+                "+" if charge == 1 else f"{charge}+"
+            )  # '+' for +1, '2+', '3+', etc., for higher values
+        elif charge < 0:
+            return (
+                "-" if charge == -1 else f"{-charge}-"
+            )  # '-' for -1, '2-', '3-', etc., for lower values
+        else:
+            return ""  # No charge symbol for neutral atoms
+    @staticmethod
+    def _find_changed_nodes(
+        graph1: nx.Graph, graph2: nx.Graph, attributes: list = ["charge"]
+    ) -> list:
+        """
+        Identifies nodes with changes in specified attributes between two NetworkX graphs.
+        Parameters:
+        - graph1 (nx.Graph): The first NetworkX graph.
+        - graph2 (nx.Graph): The second NetworkX graph.
+        - attributes (list): A list of attribute names to check for changes.
+        Returns:
+        - list: Node identifiers that have changes in the specified attributes.
+        """
+        changed_nodes = []
+        # Iterate through nodes in the first graph
+        for node in graph1.nodes():
+            # Ensure the node exists in both graphs
+            if node in graph2:
+                # Check each specified attribute for changes
+                for attr in attributes:
+                    value1 = graph1.nodes[node].get(attr, None)
+                    value2 = graph2.nodes[node].get(attr, None)
+                    if value1 != value2:
+                        changed_nodes.append(node)
+                        break
+        return changed_nodes
+    @staticmethod
+    def _convert_graph_to_gml(
+        graph: nx.Graph,
+        section: str,
+        changed_node_ids: List,
+        explicit_hydrogen: bool = False,
+    ) -> str:
+        """
+        Convert a NetworkX graph to a GML string representation, focusing on nodes for the
+        'context' section and on nodes and edges for the 'left' or 'right' sections.
+        Parameters:
+        - graph (nx.Graph): The NetworkX graph to be converted.
+        - section (str): The section name in the GML output, typically "left", "right", or
+        "context".
+        - changed_node_ids (List): list of nodes change attribute
+        - explicit_hydrogen (bool): Whether to explicitly include hydrogen atoms
+        in the output.
+        Returns:
+        str: The GML string representation of the graph for the specified section.
+        """
+        order_to_label = {1: "-", 1.5: ":", 2: "=", 3: "#"}
+        gml_str = f"   {section} [\n"
+        if section == "context":
+            for node in graph.nodes(data=True):
+                if node[0] not in changed_node_ids:
+                    element = node[1].get("element", "X")
+                    charge = node[1].get("charge", 0)
+                    charge_str = NXToGML._charge_to_string(charge)
+                    gml_str += (
+                        f'      node [ id {node[0]} label "{element}{charge_str}" ]\n'
+                    )
+            if explicit_hydrogen:
+                for edge in graph.edges(data=True):
+                    order = edge[2].get("order", (1.0, 1.0))
+                    standard_order = edge[2].get("standard_order", (0))
+                    if standard_order == 0:
+                        label = order_to_label.get(order, "-")
+                        gml_str += (
+                            f"      edge [ source {edge[0]} target {edge[1]}"
+                            + f' label "{label}" ]\n'
+                        )
+        if section != "context":
+            for edge in graph.edges(data=True):
+                label = order_to_label.get(edge[2].get("order", 1), "-")
+                gml_str += f'      edge [ source {edge[0]} target {edge[1]} label "{label}" ]\n'
+            for node in graph.nodes(data=True):
+                if node[0] in changed_node_ids:
+                    element = node[1].get("element", "X")
+                    charge = node[1].get("charge", 0)
+                    charge_str = NXToGML._charge_to_string(charge)
+                    gml_str += (
+                        f'      node [ id {node[0]} label "{element}{charge_str}" ]\n'
+                    )
+        gml_str += "   ]\n"
+        return gml_str
+    @staticmethod
+    def _rule_grammar(
+        L: nx.Graph,
+        R: nx.Graph,
+        K: nx.Graph,
+        rule_name: str,
+        changed_node_ids: List,
+        explicit_hydrogen: bool,
+    ) -> str:
+        """
+        Generate a GML string representation for a chemical rule, including its left,
+        context, and right graphs.
+        Parameters:
+        - L (nx.Graph): The left graph.
+        - R (nx.Graph): The right graph.
+        - K (nx.Graph): The context graph.
+        - rule_name (str): The name of the rule.
+        - explicit_hydrogen (bool): Whether to explicitly include hydrogen atoms in the output.
+        Returns:
+        - str: The GML string representation of the rule.
+        """
+        gml_str = "rule [\n"
+        gml_str += f'   ruleID "{rule_name}"\n'
+        gml_str += NXToGML._convert_graph_to_gml(L, "left", changed_node_ids)
+        gml_str += NXToGML._convert_graph_to_gml(
+            K, "context", changed_node_ids, explicit_hydrogen
+        )
+        gml_str += NXToGML._convert_graph_to_gml(R, "right", changed_node_ids)
+        gml_str += "]"
+        return gml_str
+    @staticmethod
+    def transform(
+        graph_rules: Tuple[nx.Graph, nx.Graph, nx.Graph],
+        rule_name: str = "Test",
+        reindex: bool = False,
+        attributes: List[str] = ["charge"],
+        explicit_hydrogen: bool = False,
+    ) -> Dict[str, str]:
+        """
+        Process a dictionary of graph rules to generate GML strings for each rule, with an
+        option to reindex nodes and edges.
+        Parameters:
+        - graph_rules (Dict[str, Tuple[nx.Graph, nx.Graph, nx.Graph]]): A dictionary
+        mapping rule names to tuples of (L, R, K) graphs.
+        - reindex (bool): If true, reindex node IDs based on the L graph sequence.
+        - explicit_hydrogen (bool): Whether to explicitly include hydrogen atoms in the output.
+        Returns:
+        - Dict[str, str]: A dictionary mapping rule names to their GML string
+        representations.
+        """
+        L, R, K = graph_rules
+        if explicit_hydrogen:
+            K = expand_hydrogens(K)
+        if reindex:
+            # Create an index mapping from L graph
+            index_mapping = {
+                old_id: new_id for new_id, old_id in enumerate(L.nodes(), 1)
+            }
+            # Apply the mapping to L, R, and K graphs
+            L = nx.relabel_nodes(L, index_mapping)
+            R = nx.relabel_nodes(R, index_mapping)
+            K = nx.relabel_nodes(K, index_mapping)
+        changed_node_ids = NXToGML._find_changed_nodes(L, R, attributes)
+        rule_grammar = NXToGML._rule_grammar(
+            L, R, K, rule_name, changed_node_ids, explicit_hydrogen
+        )
+        return rule_grammar