PyPI - synkit - Versions diffs - 0.0.1__py3-none-any.whl - Mend

synkit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

synkit/Chem/Fingerprint/__init__.py +0 -0
synkit/Chem/Fingerprint/fp_calculator.py +122 -0
synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
synkit/Chem/Fingerprint/transformation_fp.py +79 -0
synkit/Chem/Molecule/__init__.py +0 -0
synkit/Chem/Molecule/standardize.py +137 -0
synkit/Chem/Reaction/__init__.py +0 -0
synkit/Chem/Reaction/balance_check.py +162 -0
synkit/Chem/Reaction/cleanning.py +59 -0
synkit/Chem/Reaction/deionize.py +289 -0
synkit/Chem/Reaction/neutralize.py +256 -0
synkit/Chem/Reaction/reagent.py +102 -0
synkit/Chem/Reaction/standardize.py +157 -0
synkit/Chem/Reaction/tautomerize.py +168 -0
synkit/Graph/Cluster/__init__.py +0 -0
synkit/Graph/Cluster/morphism.py +83 -0
synkit/Graph/Feature/__init__.py +0 -0
synkit/Graph/Feature/graph_descriptors.py +325 -0
synkit/Graph/Feature/graph_fps.py +97 -0
synkit/Graph/Feature/graph_signature.py +236 -0
synkit/Graph/Feature/hash_fps.py +130 -0
synkit/Graph/Feature/morgan_fps.py +87 -0
synkit/Graph/Feature/path_fps.py +82 -0
synkit/Graph/__init.py +0 -0
synkit/IO/__init__.py +0 -0
synkit/IO/chem_converter.py +231 -0
synkit/IO/data_io.py +277 -0
synkit/IO/data_process.py +49 -0
synkit/IO/debug.py +78 -0
synkit/IO/dg_to_gml.py +124 -0
synkit/IO/gml_to_nx.py +119 -0
synkit/IO/graph_to_mol.py +110 -0
synkit/IO/mol_to_graph.py +282 -0
synkit/IO/nx_to_gml.py +200 -0
synkit/IO/parse_rule.py +172 -0
synkit/IO/smiles_to_id.py +119 -0
synkit/ITS/_misc.py +280 -0
synkit/ITS/aam_validator.py +254 -0
synkit/ITS/its_builder.py +94 -0
synkit/ITS/its_construction.py +213 -0
synkit/ITS/normalize_aam.py +183 -0
synkit/ITS/partial_expand.py +170 -0
synkit/Reactor/__init__.py +0 -0
synkit/Reactor/core_engine.py +164 -0
synkit/Reactor/inference.py +73 -0
synkit/Reactor/multi_step.py +227 -0
synkit/Reactor/multi_step_aam.py +82 -0
synkit/Reactor/reagent.py +95 -0
synkit/Reactor/rule_apply.py +81 -0
synkit/Vis/__init__.py +0 -0
synkit/Vis/chemical_graph_visualizer.py +378 -0
synkit/Vis/chemical_reaction_visualizer.py +133 -0
synkit/Vis/chemical_space.py +83 -0
synkit/Vis/embedding.py +92 -0
synkit/Vis/graph_visualizer.py +286 -0
synkit/Vis/pdf_writer.py +143 -0
synkit/Vis/rsmi_to_fig.py +169 -0
synkit/__init__.py +0 -0
synkit/_misc.py +181 -0
synkit-0.0.1.dist-info/METADATA +148 -0
synkit-0.0.1.dist-info/RECORD +63 -0
synkit-0.0.1.dist-info/WHEEL +4 -0
synkit-0.0.1.dist-info/licenses/LICENSE +21 -0

synkit/Graph/Feature/hash_fps.py ADDED Viewed

@@ -0,0 +1,130 @@
+import networkx as nx
+import hashlib
+from typing import Optional, Any
+class HashFPs:
+    def __init__(
+        self, graph: nx.Graph, numBits: int = 256, hash_alg: str = "sha256"
+    ) -> None:
+        """
+        Initialize the HashFPs class with a graph and configuration settings.
+        Parameters:
+        - graph (nx.Graph): The graph to be fingerprinted.
+        - numBits (int): Number of bits in the output binary hash. Default is 256 bits.
+        - hash_alg (str): The hash algorithm to use, such as 'sha256' or 'sha512'.
+        Raises:
+        - ValueError: If `numBits` is non-positive or if `hash_alg` is not supported
+        by hashlib.
+        """
+        self.graph = graph
+        self.numBits = numBits
+        self.hash_alg = hash_alg
+        self.validate_parameters()
+    def validate_parameters(self) -> None:
+        """Validate the initial parameters for errors."""
+        if self.numBits <= 0:
+            raise ValueError("Number of bits must be positive")
+        if not hasattr(hashlib, self.hash_alg):
+            raise ValueError(f"Unsupported hash algorithm: {self.hash_alg}")
+    def hash_fps(
+        self,
+        start_node: Optional[int] = None,
+        end_node: Optional[int] = None,
+        max_path_length: Optional[int] = None,
+    ) -> str:
+        """
+        Generate a binary hash fingerprint of the graph based on its paths and cycles.
+        Parameters:
+        - start_node (Optional[int]): The starting node index for path detection.
+        - end_node (Optional[int]): The ending node index for path detection.
+        - max_path_length (Optional[int]): The maximum length for paths to be considered.
+        Returns:
+        - str: A binary string representing the truncated hash of the graph's structural
+        features.
+        """
+        hash_object = self.initialize_hash()
+        features = self.extract_features(start_node, end_node, max_path_length)
+        full_hash_binary = self.finalize_hash(hash_object, features)
+        return full_hash_binary
+    def initialize_hash(self) -> Any:
+        """Initialize and return the hash object based on the specified algorithm."""
+        return getattr(hashlib, self.hash_alg)()
+    def extract_features(
+        self,
+        start_node: Optional[int],
+        end_node: Optional[int],
+        max_path_length: Optional[int],
+    ) -> str:
+        """
+        Extract features from the graph based on paths and cycles.
+        Parameters:
+        - start_node (Optional[int]): The starting node for path detection.
+        - end_node (Optional[int]): The ending node for path detection.
+        - max_path_length (Optional[int]): Cutoff for path length during detection.
+        Returns:
+        - str: A string of concatenated feature values.
+        """
+        cycles = list(nx.simple_cycles(self.graph))
+        paths = []
+        if start_node is not None and end_node is not None:
+            paths = list(
+                nx.all_simple_paths(
+                    self.graph,
+                    source=start_node,
+                    target=end_node,
+                    cutoff=max_path_length,
+                )
+            )
+        features = [len(c) for c in cycles] + [len(p) for p in paths]
+        return "".join(map(str, features))
+    def finalize_hash(self, hash_object: Any, features: str) -> str:
+        """
+        Finalize the hash using the features extracted and return the hash as a binary
+        string.
+        Parameters:
+        - hash_object (Any): The hash object.
+        - features (str): Concatenated string of graph features.
+        Returns:
+        - str: The final binary string of the hash, truncated or extended to `numBits`.
+        """
+        hash_object.update(features.encode())
+        full_hash_binary = bin(int(hash_object.hexdigest(), 16))[2:]
+        if len(full_hash_binary) < self.numBits:
+            full_hash_binary += self.iterative_deepening(
+                hash_object, self.numBits - len(full_hash_binary)
+            )
+        return full_hash_binary[: self.numBits]
+    def iterative_deepening(self, hash_object: Any, remaining_bits: int) -> str:
+        """
+        Extend hash length using iterative hashing until the desired bit length is
+        achieved.
+        Parameters:
+        - hash_object (hashlib._Hash): The hash object for iterative deepening.
+        - remaining_bits (int): Number of bits needed to reach `numBits`.
+        Returns:
+        - str: Additional binary data to achieve the desired hash length.
+        """
+        additional_data = ""
+        while (
+            len(additional_data) * 4 < remaining_bits
+        ):  # Each hex digit represents 4 bits
+            hash_object.update(additional_data.encode())
+            additional_data += hash_object.hexdigest()
+        return bin(int(additional_data, 16))[2:][:remaining_bits]

synkit/Graph/Feature/morgan_fps.py ADDED Viewed

@@ -0,0 +1,87 @@
+import networkx as nx
+import hashlib
+from typing import Any
+class MorganFPs:
+    def __init__(
+        self,
+        graph: nx.Graph,
+        radius: int = 3,
+        nBits: int = 1024,
+        hash_alg: str = "sha256",
+    ):
+        """
+        Initialize the MorganFPs class to generate fingerprints based on the Morgan
+        algorithm, approximating Extended Connectivity Fingerprints (ECFPs).
+        Parameters:
+        - graph (nx.Graph): The graph to analyze.
+        - radius (int): The radius to consider for node neighborhood analysis.
+        - nBits (int): Total number of bits in the final fingerprint output.
+        - hash_alg (str): Hash algorithm to use for generating hashes of node
+        neighborhoods.
+        """
+        self.graph = graph
+        self.radius = radius
+        self.nBits = nBits
+        self.hash_alg = hash_alg
+        self.hash_function = getattr(hashlib, self.hash_alg)
+    def generate_fingerprint(self) -> str:
+        """
+        Generate a binary string fingerprint of the graph based on the local environments
+        of nodes. Ensures the output is exactly `nBits` in length using iterative
+        deepening if necessary.
+        Returns:
+        - str: A binary string of length `nBits` representing the fingerprint of the
+        graph.
+        """
+        fingerprint = ""
+        for node in self.graph.nodes():
+            neighborhood = nx.single_source_shortest_path_length(
+                self.graph, node, cutoff=self.radius
+            )
+            neighborhood_str = "-".join(
+                [
+                    f"{nbr}-{dist}"
+                    for nbr, dist in sorted(neighborhood.items())
+                    if nbr != node
+                ]
+            )
+            hash_obj = self.hash_function(neighborhood_str.encode())
+            node_hash = bin(int(hash_obj.hexdigest(), 16))[2:].zfill(
+                hash_obj.digest_size * 8
+            )
+            if len(fingerprint) + len(node_hash) > self.nBits:
+                needed_bits = self.nBits - len(fingerprint)
+                node_hash = node_hash[:needed_bits]
+            fingerprint += node_hash
+            if len(fingerprint) == self.nBits:
+                return fingerprint
+        if len(fingerprint) < self.nBits:
+            fingerprint += self.iterative_deepening(
+                hash_obj, self.nBits - len(fingerprint)
+            )
+        return fingerprint
+    def iterative_deepening(self, hash_object: Any, remaining_bits: int) -> str:
+        """
+        Extend the hash length using iterative hashing until the desired bit length is
+        achieved.
+        Parameters:
+        - hash_object (hashlib._Hash): The hash object used for iterative deepening.
+        - remaining_bits (int): Number of bits needed to complete the fingerprint to
+        `nBits`.
+        Returns:
+        - str: Additional binary data to achieve the desired hash length.
+        """
+        additional_data = ""
+        while len(additional_data) * 4 < remaining_bits:
+            hash_object.update(additional_data.encode())
+            additional_data += hash_object.hexdigest()
+        return bin(int(additional_data, 16))[2:][:remaining_bits]

synkit/Graph/Feature/path_fps.py ADDED Viewed

@@ -0,0 +1,82 @@
+import networkx as nx
+import hashlib
+from typing import Any
+class PathFPs:
+    def __init__(
+        self,
+        graph: nx.Graph,
+        max_length: int = 10,
+        nBits: int = 1024,
+        hash_alg: str = "sha256",
+    ) -> None:
+        """
+        Initialize the PathFPs class to create a binary fingerprint based on paths in a
+        graph.
+        Parameters:
+        - graph (nx.Graph): Graph on which to perform analysis.
+        - max_length (int): Limit on path lengths considered in the fingerprint.
+        - nBits (int): Size of the binary fingerprint in bits.
+        - hash_alg (str): Cryptographic hash function used for path hashing.
+        - hash_function (Callable): Hash function initialized from hashlib.
+        """
+        self.graph = graph
+        self.max_length = max_length
+        self.nBits = nBits
+        self.hash_alg = hash_alg
+        self.hash_function = getattr(hashlib, self.hash_alg)
+    def generate_fingerprint(self) -> str:
+        """
+        Generate a binary string fingerprint of the graph by hashing paths up to a certain
+        length and combining them.
+        Returns:
+        - str: A binary string of length `nBits` that represents the fingerprint of the
+        graph.
+        """
+        fingerprint = ""
+        for node in self.graph.nodes():
+            for target in self.graph.nodes():
+                if node != target:
+                    for path in nx.all_simple_paths(
+                        self.graph, source=node, target=target, cutoff=self.max_length
+                    ):
+                        path_str = "-".join(map(str, path))
+                        hash_obj = self.hash_function(path_str.encode())
+                        path_hash = bin(int(hash_obj.hexdigest(), 16))[2:].zfill(
+                            hash_obj.digest_size * 8
+                        )
+                        if len(fingerprint) + len(path_hash) > self.nBits:
+                            needed_bits = self.nBits - len(fingerprint)
+                            path_hash = path_hash[:needed_bits]
+                        fingerprint += path_hash
+                        if len(fingerprint) == self.nBits:
+                            return fingerprint
+        if len(fingerprint) < self.nBits:
+            fingerprint += self.iterative_deepening(
+                hash_obj, self.nBits - len(fingerprint)
+            )
+        return fingerprint
+    def iterative_deepening(self, hash_object: Any, remaining_bits: int) -> str:
+        """
+        Extend the hash length using iterative hashing until the desired bit length is
+        achieved.
+        Parameters:
+        - hash_object (hashlib._Hash): The hash object used for iterative deepening.
+        - remaining_bits (int): Number of bits needed to complete the fingerprint
+        to `nBits`.
+        Returns:
+        - str: Additional binary data to achieve the desired hash length.
+        """
+        additional_data = ""
+        while len(additional_data) * 4 < remaining_bits:
+            hash_object.update(additional_data.encode())
+            additional_data += hash_object.hexdigest()
+        return bin(int(additional_data, 16))[2:][:remaining_bits]

synkit/Graph/__init.py ADDED Viewed

File without changes

synkit/IO/__init__.py ADDED Viewed

File without changes

synkit/IO/chem_converter.py ADDED Viewed

@@ -0,0 +1,231 @@
+import networkx as nx
+from rdkit import Chem
+from typing import Optional, Tuple
+from synkit.IO.debug import setup_logging
+from synkit.IO.mol_to_graph import MolToGraph
+from synkit.IO.graph_to_mol import GraphToMol
+from synkit.ITS.its_construction import ITSConstruction
+from synkit.IO.nx_to_gml import NXToGML
+from synkit.IO.gml_to_nx import GMLToNX
+from synkit.ITS._misc import get_rc, its_decompose
+from synkit._misc import remove_explicit_hydrogen
+logger = setup_logging()
+def smiles_to_graph(
+    smiles: str,
+    drop_non_aam: bool,
+    light_weight: bool,
+    sanitize: bool,
+    use_index_as_atom_map: bool,
+) -> Optional[nx.Graph]:
+    """
+    Helper function to convert SMILES string to a graph using MolToGraph class.
+    Parameters:
+    - smiles (str): SMILES representation of the molecule.
+    - drop_non_aam (bool): Whether to drop nodes without atom mapping.
+    - light_weight (bool): Whether to create a light-weight graph.
+    - sanitize (bool): Whether to sanitize the molecule during conversion.
+    - use_index_as_atom_map (bool): Whether to use the index of atoms as atom map numbers
+    Returns:
+    - nx.Graph or None: The networkx graph representation of the molecule,
+    or None if conversion fails.
+    """
+    try:
+        # Parse SMILES to a molecule object, without sanitizing initially
+        mol = Chem.MolFromSmiles(smiles, sanitize=False)
+        if mol is None:
+            logger.warning(f"Failed to parse SMILES: {smiles}")
+            return None
+        # Perform sanitization if requested
+        if sanitize:
+            try:
+                Chem.SanitizeMol(mol)
+            except Exception as sanitize_error:
+                logger.error(
+                    f"Sanitization failed for SMILES {smiles}: {sanitize_error}"
+                )
+                return None
+        # Convert molecule to graph
+        graph_converter = MolToGraph()
+        graph = graph_converter.mol_to_graph(
+            mol, drop_non_aam, light_weight, use_index_as_atom_map
+        )
+        if graph is None:
+            logger.warning(f"Failed to convert molecule to graph for SMILES: {smiles}")
+        return graph
+    except Exception as e:
+        logger.error(
+            "Unhandled exception in converting SMILES to graph"
+            + f": {smiles}, Error: {str(e)}"
+        )
+        return None
+def rsmi_to_graph(
+    rsmi: str,
+    drop_non_aam: bool = True,
+    light_weight: bool = True,
+    sanitize: bool = True,
+    use_index_as_atom_map: bool = True,
+) -> Tuple[Optional[nx.Graph], Optional[nx.Graph]]:
+    """
+    Converts reactant and product SMILES strings from a reaction SMILES (RSMI) format
+    to graph representations.
+    Parameters:
+    - rsmi (str): Reaction SMILES string in "reactants>>products" format.
+    - drop_non_aam (bool, optional): If True, nodes without atom mapping numbers
+    will be dropped.
+    - light_weight (bool, optional): If True, creates a light-weight graph.
+    - sanitize (bool, optional): If True, sanitizes molecules during conversion.
+    Returns:
+    - Tuple[Optional[nx.Graph], Optional[nx.Graph]]: A tuple containing t
+    he graph representations of the reactants and products.
+    """
+    try:
+        reactants_smiles, products_smiles = rsmi.split(">>")
+        r_graph = smiles_to_graph(
+            reactants_smiles,
+            drop_non_aam,
+            light_weight,
+            sanitize,
+            use_index_as_atom_map,
+        )
+        p_graph = smiles_to_graph(
+            products_smiles, drop_non_aam, light_weight, sanitize, use_index_as_atom_map
+        )
+        return (r_graph, p_graph)
+    except ValueError:
+        logger.error(f"Invalid RSMI format: {rsmi}")
+        return (None, None)
+def graph_to_rsmi(
+    r: nx.Graph,
+    p: nx.Graph,
+    its: nx.Graph,
+    sanitize: bool = True,
+    explicit_hydrogen: bool = False,
+    ignore_hcount_inference: bool = False,
+) -> str:
+    """
+    Converts graph representations of reactants and products into a
+    reaction SMILES string.
+    Parameters:
+    - r (nx.Graph): Graph of the reactants.
+    - p (nx.Graph): Graph of the products.
+    - its (nx.Graph): Intermediate transition state graph, relevant for hydrogen count
+    inference.
+    - sanitize (bool): Specifies whether the molecule should be sanitized upon conversion.
+    - explicit_hydrogen (bool): Controls whether hydrogens are explicitly represented in
+    the output.
+    - ignore_hcount_inference (bool): If false, hydrogens counts are inferred from
+    the ITS graph.
+    Returns:
+    - str: Reaction SMILES string representing the conversion from reactants to products.
+    """
+    # Initialize a GraphToMol converter
+    converter = GraphToMol()
+    if not explicit_hydrogen:
+        # Decide whether to infer hydrogen count based on the ITS graph
+        if ignore_hcount_inference:
+            r_mol = converter.graph_to_mol(r, sanitize=sanitize, use_h_count=True)
+            p_mol = converter.graph_to_mol(p, sanitize=sanitize, use_h_count=True)
+        else:
+            rc = get_rc(its)
+            r = remove_explicit_hydrogen(r, rc.nodes())
+            p = remove_explicit_hydrogen(p, rc.nodes())
+            r_mol = converter.graph_to_mol(r, sanitize=sanitize, use_h_count=True)
+            p_mol = converter.graph_to_mol(p, sanitize=sanitize, use_h_count=True)
+    else:
+        r_mol = converter.graph_to_mol(r, sanitize=sanitize)
+        p_mol = converter.graph_to_mol(p, sanitize=sanitize)
+    # Convert RDKit Mol objects to SMILES and format them into a reaction SMILES string
+    try:
+        r_smiles = Chem.MolToSmiles(r_mol)
+        p_smiles = Chem.MolToSmiles(p_mol)
+        reaction_smiles = f"{r_smiles}>>{p_smiles}"
+    except Exception as e:
+        # Handle errors gracefully
+        reaction_smiles = "Error in generating SMILES: " + str(e)
+    return reaction_smiles
+def smart_to_gml(
+    smart: str,
+    core: bool = True,
+    sanitize: bool = False,
+    rule_name: str = "rule",
+    reindex: bool = True,
+    explicit_hydrogen: bool = False,
+) -> str:
+    """
+    Converts a SMARTS string to GML format, optionally focusing on the reaction core.
+    Parameters:
+    - smart (str): The SMARTS string representing the reaction.
+    - core (bool): Whether to extract and focus on the reaction core. Defaults to True.
+    - sanitize (bool): Specifies whether the molecule should be sanitized upon conversion.
+    - rule_name (str): The name of the reaction rule. Defaults to "rule".
+    - reindex (bool): Whether to reindex the graph nodes. Defaults to True.
+    - explicit_hydrogen (bool): Controls whether hydrogens are explicitly represented
+    in the output.
+    Returns:
+    - str: The GML representation of the reaction.
+    """
+    r, p = rsmi_to_graph(smart, sanitize=sanitize)
+    its = ITSConstruction.ITSGraph(r, p)
+    if core:
+        its = get_rc(its)
+        r, p = its_decompose(its)
+    gml = NXToGML().transform(
+        (r, p, its),
+        reindex=reindex,
+        rule_name=rule_name,
+        explicit_hydrogen=explicit_hydrogen,
+    )
+    return gml
+def gml_to_smart(
+    gml: str,
+    sanitize: bool = True,
+    explicit_hydrogen: bool = False,
+    ignore_hcount_inference: bool = False,
+) -> str:
+    """
+    Converts a GML string back to a SMARTS string by interpreting the graph structures.
+    Parameters:
+    - gml (str): The GML string to convert.
+    - sanitize (bool): Specifies whether the molecule should be sanitized upon conversion.
+    - explicit_hydrogen (bool): Controls whether hydrogens are explicitly represented
+    in the output.
+    - ignore_hcount_inference (bool): If false, hydrogens counts are inferred
+    from the ITS graph.
+    Returns:
+    - str: The corresponding SMARTS string.
+    """
+    r, p, rc = GMLToNX(gml).transform()
+    return (
+        graph_to_rsmi(r, p, rc, sanitize, explicit_hydrogen, ignore_hcount_inference),
+        rc,
+    )