synkit 0.0.5__tar.gz → 0.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {synkit-0.0.5 → synkit-0.0.6}/PKG-INFO +1 -1
- {synkit-0.0.5 → synkit-0.0.6}/pyproject.toml +1 -1
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Molecule/standardize.py +15 -9
- synkit-0.0.6/synkit/Rule/compose_rule.py +210 -0
- synkit-0.0.6/synkit/Rule/rule_mapping.py +314 -0
- synkit-0.0.6/synkit/Rule/seq_comp.py +72 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/strip_rule.py +1 -1
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Vis/graph_visualizer.py +2 -1
- {synkit-0.0.5 → synkit-0.0.6}/.github/workflows/build-doc.yml +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/.github/workflows/publish-package.yml +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/.github/workflows/test-and-lint.yml +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/.gitignore +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/CHANGELOG.md +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Data/Benchmark/conversion_time.json.gz +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Data/Figure/synkit.png +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Data/Testcase/Compose/ComposeRule/data.txt +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Data/Testcase/Compose/SingleRule/R0/0.gml +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Data/Testcase/Compose/SingleRule/R0/1.gml +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Data/Testcase/Compose/SingleRule/R0/2.gml +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Data/Testcase/mech.json.gz +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Data/Testcase/para_rule.json.gz +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Data/Testcase/para_rule_retro.json.gz +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Data/smart.json.gz +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/LICENSE +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Makefile +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/README.md +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Fingerprint/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Fingerprint/test_fp_calculator.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Fingerprint/test_smiles_featurizer.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Fingerprint/test_transformation_fp.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Molecule/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Molecule/test_standardize.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Reaction/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Reaction/test_balance_checker.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Reaction/test_cleanning.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Reaction/test_deionize.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Reaction/test_fix_aam.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Reaction/test_neutralize.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Reaction/test_rsmi_utils.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Reaction/test_standardize.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/Reaction/test_tautomerize.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Chem/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Cluster/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Cluster/test_batch_cluster.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Cluster/test_graph_cluster.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Cluster/test_graph_morphism.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Cluster/test_rule_morphism.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Context/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Context/test_hier_context.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Context/test_radius_expand.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Feature/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Feature/test_graph_descriptors.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Feature/test_graph_fps.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Feature/test_graph_signature.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Feature/test_hash_fps.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Feature/test_morgan_fps.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Feature/test_path_fps.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Hydrogen/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Hydrogen/test_graph_hydrogen.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/Hydrogen/test_hcomplete.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/ITS/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/ITS/test_aam_validator.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/ITS/test_its_construction.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/ITS/test_its_expand.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/ITS/test_normalize_aam.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Graph/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/IO/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/IO/test_chemical_converter.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/IO/test_dg_to_gml.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/IO/test_gml_to_nx.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/IO/test_graph_to_mol.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/IO/test_mol_to_graph.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/IO/test_nx_to_gml.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Reactor/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Reactor/test_core_engine.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Reactor/test_crn.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Reactor/test_multi_steps.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Reactor/test_path_finder.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Reactor/test_reactor_engine.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Reactor/test_reactor_utils.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Rule/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Rule/test_molecule_rule.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Rule/test_reactor_rule.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Rule/test_retro_reactor.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Rule/test_rule_compose.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Rule/test_rule_rbl.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Rule/test_rule_utils.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Rule/test_valance_constrain.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Vis/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Vis/test_dpo_vis.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/Vis/test_embedding.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/Test/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/doc/conf.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/doc/getting_started.rst +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/doc/index.rst +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/doc/references.rst +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/lint.sh +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/make.bat +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/pytest.sh +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/requirements.txt +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Fingerprint/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Fingerprint/fp_calculator.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Fingerprint/smiles_featurizer.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Fingerprint/transformation_fp.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Molecule/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Reaction/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Reaction/balance_check.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Reaction/cleanning.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Reaction/deionize.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Reaction/fix_aam.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Reaction/neutralize.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Reaction/rsmi_utils.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Reaction/standardize.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/Reaction/tautomerize.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Chem/utils.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Cluster/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Cluster/batch_cluster.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Cluster/graph_cluster.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Cluster/graph_morphism.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Cluster/rule_morphism.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Context/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Context/hier_context.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Context/radius_expand.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Feature/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Feature/graph_descriptors.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Feature/graph_fps.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Feature/graph_signature.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Feature/hash_fps.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Feature/morgan_fps.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Feature/path_fps.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Feature/wl_hash.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Hyrogen/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Hyrogen/_misc.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Hyrogen/hcomplete.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/Hyrogen/hextend.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/ITS/aam_utils.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/ITS/aam_validator.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/ITS/its_builder.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/ITS/its_construction.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/ITS/its_decompose.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/ITS/its_expand.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/ITS/normalize_aam.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Graph/__init.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/IO/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/IO/chem_converter.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/IO/data_io.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/IO/data_process.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/IO/debug.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/IO/dg_to_gml.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/IO/gml_to_nx.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/IO/graph_to_mol.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/IO/mol_to_graph.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/IO/nx_to_gml.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/IO/smiles_to_id.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Metrics/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Metrics/_base.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Metrics/_plot.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Metrics/_ranking.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Reactor/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Reactor/core_engine.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Reactor/crn.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Reactor/dcrn.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Reactor/multi_steps.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Reactor/path_finder.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Reactor/reactor_engine.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Reactor/reactor_utils.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Reactor/single_predictor.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/MaxValence.json.gz +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/longest_path.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/molecule_rule.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/prune_templates.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/reactor_rule.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/retro_reactor.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/rule_compose.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/rule_rbl.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/rule_utils.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Rule/valence_constrain.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Utils/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Utils/utils.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Vis/__init__.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Vis/chemical_graph_visualizer.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Vis/chemical_reaction_visualizer.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Vis/chemical_space.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Vis/dpo_vis.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Vis/embedding.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Vis/pdf_writer.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/Vis/rsmi_to_fig.py +0 -0
- {synkit-0.0.5 → synkit-0.0.6}/synkit/__init__.py +0 -0
|
@@ -88,7 +88,9 @@ def remove_explicit_hydrogens(mol: Chem.Mol) -> Chem.Mol:
|
|
|
88
88
|
return Chem.RemoveHs(mol)
|
|
89
89
|
|
|
90
90
|
|
|
91
|
-
def remove_radicals_and_add_hydrogens(
|
|
91
|
+
def remove_radicals_and_add_hydrogens(
|
|
92
|
+
mol: Chem.Mol, removeH=True
|
|
93
|
+
) -> Optional[Chem.Mol]:
|
|
92
94
|
"""
|
|
93
95
|
Remove radicals from a molecule by setting radical electrons to zero and adding hydrogens where needed.
|
|
94
96
|
|
|
@@ -98,7 +100,7 @@ def remove_radicals_and_add_hydrogens(mol: Chem.Mol) -> Optional[Chem.Mol]:
|
|
|
98
100
|
Returns:
|
|
99
101
|
- Chem.Mol: Mol object with radicals removed and necessary hydrogens added.
|
|
100
102
|
"""
|
|
101
|
-
mol = Chem.RemoveHs(mol) # Remove explicit hydrogens first
|
|
103
|
+
# mol = Chem.RemoveHs(mol) # Remove explicit hydrogens first
|
|
102
104
|
for atom in mol.GetAtoms():
|
|
103
105
|
if atom.GetNumRadicalElectrons() > 0:
|
|
104
106
|
atom.SetNumExplicitHs(
|
|
@@ -106,7 +108,10 @@ def remove_radicals_and_add_hydrogens(mol: Chem.Mol) -> Optional[Chem.Mol]:
|
|
|
106
108
|
)
|
|
107
109
|
atom.SetNumRadicalElectrons(0)
|
|
108
110
|
mol = rdmolops.AddHs(mol) # Add hydrogens back
|
|
109
|
-
|
|
111
|
+
if removeH:
|
|
112
|
+
return remove_explicit_hydrogens(mol)
|
|
113
|
+
else:
|
|
114
|
+
return mol
|
|
110
115
|
|
|
111
116
|
|
|
112
117
|
def remove_isotopes(mol: Chem.Mol) -> Chem.Mol:
|
|
@@ -138,7 +143,7 @@ def clear_stereochemistry(mol: Chem.Mol) -> Chem.Mol:
|
|
|
138
143
|
return mol
|
|
139
144
|
|
|
140
145
|
|
|
141
|
-
def fix_radical_rsmi(rsmi: str) -> str:
|
|
146
|
+
def fix_radical_rsmi(rsmi: str, removeH=True) -> str:
|
|
142
147
|
"""
|
|
143
148
|
Takes a reaction SMILES string with potential radicals and returns a new reaction SMILES string
|
|
144
149
|
where all radicals have been replaced by adding hydrogen atoms.
|
|
@@ -150,12 +155,13 @@ def fix_radical_rsmi(rsmi: str) -> str:
|
|
|
150
155
|
- str: A reaction SMILES string with radicals replaced by hydrogen atoms.
|
|
151
156
|
"""
|
|
152
157
|
r, p = rsmi.split(">>")
|
|
153
|
-
r_mol = Chem.MolFromSmiles(r)
|
|
154
|
-
p_mol = Chem.MolFromSmiles(p)
|
|
155
|
-
|
|
158
|
+
r_mol = Chem.MolFromSmiles(r, sanitize=False)
|
|
159
|
+
p_mol = Chem.MolFromSmiles(p, sanitize=False)
|
|
160
|
+
Chem.SanitizeMol(r_mol)
|
|
161
|
+
Chem.SanitizeMol(p_mol)
|
|
156
162
|
if r_mol is not None and p_mol is not None:
|
|
157
|
-
r_mol = remove_radicals_and_add_hydrogens(r_mol)
|
|
158
|
-
p_mol = remove_radicals_and_add_hydrogens(p_mol)
|
|
163
|
+
r_mol = remove_radicals_and_add_hydrogens(r_mol, removeH)
|
|
164
|
+
p_mol = remove_radicals_and_add_hydrogens(p_mol, removeH)
|
|
159
165
|
|
|
160
166
|
r_smiles = Chem.MolToSmiles(r_mol) if r_mol else r
|
|
161
167
|
p_smiles = Chem.MolToSmiles(p_mol) if p_mol else p
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
from typing import List, Set, Any, Dict, Optional
|
|
2
|
+
from synkit.IO.chem_converter import gml_to_smart, smart_to_gml
|
|
3
|
+
from synkit.Rule.rule_utils import _increment_gml_ids
|
|
4
|
+
from synkit.Chem.Reaction.standardize import Standardize
|
|
5
|
+
from synkit.Chem.Reaction.cleanning import Cleanning
|
|
6
|
+
from synkit.Chem.Reaction.rsmi_utils import find_longest_fragment
|
|
7
|
+
from mod import RCMatch, ruleGMLString
|
|
8
|
+
from synkit.Reactor.core_engine import CoreEngine
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ComposeRule:
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def filter_smallest_vertex(combo: List[object]) -> List[object]:
|
|
15
|
+
"""
|
|
16
|
+
Filters and returns the elements from a list that have the smallest
|
|
17
|
+
number of vertices in their context.
|
|
18
|
+
|
|
19
|
+
Parameters:
|
|
20
|
+
- combo (List[object]): A list of objects, each with a 'context'
|
|
21
|
+
attribute that has a 'numVertices' attribute.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
- List[object]: A list of objects from the input list that have
|
|
25
|
+
the minimum number of vertices in their context.
|
|
26
|
+
"""
|
|
27
|
+
# Extract the number of vertices from each rule's context and find the minimum
|
|
28
|
+
num_vertices = [rule.context.numVertices for rule in combo]
|
|
29
|
+
min_vertex = min(num_vertices)
|
|
30
|
+
|
|
31
|
+
# Collect all rules that have the minimum number of vertices
|
|
32
|
+
new_combo = [
|
|
33
|
+
rule
|
|
34
|
+
for rule, vertices in zip(combo, num_vertices)
|
|
35
|
+
if vertices == min_vertex
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
return new_combo
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def rule_cluster(graphs: List[Any]) -> List[Any]:
|
|
42
|
+
"""
|
|
43
|
+
Cluster graphs based on their isomorphic relationships and
|
|
44
|
+
return a representative from each cluster.
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
- graphs (List[Any]): A list of graph objects.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
- List[Any]: A list of graphs where each graph is a representative from a different cluster.
|
|
51
|
+
"""
|
|
52
|
+
visited: Set[int] = set()
|
|
53
|
+
clusters: List[Set[int]] = []
|
|
54
|
+
|
|
55
|
+
for i, graph_i in enumerate(graphs):
|
|
56
|
+
if i in visited:
|
|
57
|
+
continue
|
|
58
|
+
cluster: Set[int] = {i}
|
|
59
|
+
visited.add(i)
|
|
60
|
+
for j, graph_j in enumerate(graphs):
|
|
61
|
+
if j in visited or j <= i:
|
|
62
|
+
continue
|
|
63
|
+
# Assuming isomorphism() returns 1 for isomorphic graphs.
|
|
64
|
+
if graph_i.isomorphism(graph_j) == 1:
|
|
65
|
+
cluster.add(j)
|
|
66
|
+
visited.add(j)
|
|
67
|
+
clusters.append(cluster)
|
|
68
|
+
|
|
69
|
+
representative_graphs = [graphs[list(cluster)[0]] for cluster in clusters]
|
|
70
|
+
return representative_graphs
|
|
71
|
+
|
|
72
|
+
@staticmethod
|
|
73
|
+
def _compose_mapping(
|
|
74
|
+
rule_1: str, rule_2: str, mapping: Dict[int, int], return_string: bool = True
|
|
75
|
+
) -> Any:
|
|
76
|
+
"""
|
|
77
|
+
Compose two rule graphs from their GML representations using a mapping between external IDs.
|
|
78
|
+
|
|
79
|
+
Parameters:
|
|
80
|
+
- rule_1 (str): The GML representation for the first rule.
|
|
81
|
+
- rule_2 (str): The GML representation for the second rule.
|
|
82
|
+
- mapping (Dict[int, int]): A dictionary mapping external IDs in the first rule (child side)
|
|
83
|
+
to corresponding external IDs in the second rule (parent side).
|
|
84
|
+
- return_string (bool): If True, returns the composed rule as a GML string.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
- Any: The composed rule object or its GML string if return_string is True.
|
|
88
|
+
"""
|
|
89
|
+
# Create rule objects from the GML inputs.
|
|
90
|
+
r1 = ruleGMLString(rule_1)
|
|
91
|
+
r2 = ruleGMLString(rule_2)
|
|
92
|
+
|
|
93
|
+
# Create an RCMatch object with r1 and r2.
|
|
94
|
+
m = RCMatch(r1, r2)
|
|
95
|
+
|
|
96
|
+
# Push alignments between vertices according to the mapping.
|
|
97
|
+
for child_ext_id, parent_ext_id in mapping.items():
|
|
98
|
+
v1 = r1.getVertexFromExternalId(child_ext_id)
|
|
99
|
+
v2 = r2.getVertexFromExternalId(parent_ext_id)
|
|
100
|
+
m.push(v1.right, v2.left)
|
|
101
|
+
|
|
102
|
+
# Compose the mapping.
|
|
103
|
+
composed_rule = m.compose()
|
|
104
|
+
if return_string:
|
|
105
|
+
composed_rule = composed_rule.getGMLString()
|
|
106
|
+
return composed_rule
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _compose(rule_1: str, rule_2: str, return_string: bool = True) -> List[Any]:
|
|
110
|
+
"""
|
|
111
|
+
Compose two rules and return a list of modifications that pass chemical valence checks.
|
|
112
|
+
|
|
113
|
+
Parameters:
|
|
114
|
+
- rule_1 (str): The first rule (in GML format) to compose.
|
|
115
|
+
- rule_2 (str): The second rule (in GML format) to compose.
|
|
116
|
+
- return_string (bool): If True, returns the composed rules as GML strings.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
- List[Any]: A list of valid composed rules (either as rule objects or as GML strings).
|
|
120
|
+
Returns an empty list if an error occurs.
|
|
121
|
+
"""
|
|
122
|
+
try:
|
|
123
|
+
m = RCMatch(
|
|
124
|
+
ruleGMLString(rule_1, add=False), ruleGMLString(rule_2, add=False)
|
|
125
|
+
)
|
|
126
|
+
modRes = m.composeAll()
|
|
127
|
+
modRes = ComposeRule.rule_cluster(modRes)
|
|
128
|
+
if return_string:
|
|
129
|
+
modRes = [i.getGMLString() for i in modRes]
|
|
130
|
+
return modRes
|
|
131
|
+
except Exception as e:
|
|
132
|
+
print("Error during rule composition:", e)
|
|
133
|
+
return []
|
|
134
|
+
|
|
135
|
+
@staticmethod
|
|
136
|
+
def _get_valid_rule(rules: List[str], format: str = "gml") -> List[str]:
|
|
137
|
+
"""
|
|
138
|
+
Validate and convert a list of rule GML strings to either SMARTS or GML format.
|
|
139
|
+
|
|
140
|
+
Parameters:
|
|
141
|
+
- rules (List[str]): A list of rule GML strings.
|
|
142
|
+
- format (str): The output format. 'smart' returns SMARTS strings; otherwise, returns GML strings.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
- List[str]: A list of valid rules in the desired format.
|
|
146
|
+
"""
|
|
147
|
+
new_rules: List[str] = []
|
|
148
|
+
for value in rules:
|
|
149
|
+
new = gml_to_smart(value, sanitize=True, explicit_hydrogen=False)[0]
|
|
150
|
+
if "Error" not in new:
|
|
151
|
+
if format == "smart":
|
|
152
|
+
new_rules.append(new)
|
|
153
|
+
else:
|
|
154
|
+
new_rules.append(
|
|
155
|
+
smart_to_gml(new, sanitize=True, explicit_hydrogen=False)
|
|
156
|
+
)
|
|
157
|
+
return new_rules
|
|
158
|
+
|
|
159
|
+
@staticmethod
|
|
160
|
+
def _get_comp_reaction(smart_1: str, smart_2: str) -> str:
|
|
161
|
+
"""
|
|
162
|
+
Compute a representative reaction SMILES for the composed rule from two SMARTS strings.
|
|
163
|
+
|
|
164
|
+
Parameters:
|
|
165
|
+
- smart_1 (str): The first reaction in SMARTS notation.
|
|
166
|
+
- smart_2 (str): The second reaction in SMARTS notation.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
- str: A standardized reaction SMILES representing the composition.
|
|
170
|
+
"""
|
|
171
|
+
std = Standardize()
|
|
172
|
+
rsmi_1 = std.fit(smart_1)
|
|
173
|
+
rsmi_2 = std.fit(smart_2)
|
|
174
|
+
r1, p1 = rsmi_1.split(">>")
|
|
175
|
+
r2, p2 = rsmi_2.split(">>")
|
|
176
|
+
new_rsmi = std.fit(f"{r1}.{r2}>>{p1}.{p2}")
|
|
177
|
+
return new_rsmi
|
|
178
|
+
|
|
179
|
+
def get_rule_comp(self, smart_1: str, smart_2: str) -> Optional[str]:
|
|
180
|
+
"""
|
|
181
|
+
Compose two reaction SMARTS strings into a rule (GML format) that reproduces a reference reaction.
|
|
182
|
+
|
|
183
|
+
Parameters:
|
|
184
|
+
- smart_1 (str): The first reaction in SMARTS notation.
|
|
185
|
+
- smart_2 (str): The second reaction in SMARTS notation.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
- Optional[str]: The composed rule (in GML) if a valid candidate is found; otherwise, None.
|
|
189
|
+
"""
|
|
190
|
+
rule_1 = smart_to_gml(smart_1, sanitize=True, explicit_hydrogen=False)
|
|
191
|
+
rule_2 = smart_to_gml(smart_2, sanitize=True, explicit_hydrogen=False)
|
|
192
|
+
reference_rsmi = self._get_comp_reaction(smart_1, smart_2)
|
|
193
|
+
candidate_rules = self._compose(rule_1, rule_2, return_string=True)
|
|
194
|
+
candidate_rules = [_increment_gml_ids(value) for value in candidate_rules]
|
|
195
|
+
initial_smiles = reference_rsmi.split(">>")[0].split(".")
|
|
196
|
+
largest_prod = find_longest_fragment(reference_rsmi.split(">>")[1].split("."))
|
|
197
|
+
cds = []
|
|
198
|
+
for candidate in candidate_rules:
|
|
199
|
+
inferred_rsmi = CoreEngine()._inference(candidate, initial_smiles)
|
|
200
|
+
inferred_rsmi = Cleanning.clean_smiles(inferred_rsmi)
|
|
201
|
+
inferred_prod = [i.split(">>")[1].split(".") for i in inferred_rsmi]
|
|
202
|
+
if any(largest_prod in smi for smi in inferred_prod):
|
|
203
|
+
cds.append(candidate)
|
|
204
|
+
# return candidate
|
|
205
|
+
|
|
206
|
+
cds = [ruleGMLString(i) for i in cds]
|
|
207
|
+
cds = self.filter_smallest_vertex(cds)
|
|
208
|
+
cds = [i.getGMLString() for i in cds]
|
|
209
|
+
|
|
210
|
+
return cds
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import networkx as nx
|
|
3
|
+
from operator import eq
|
|
4
|
+
from typing import Dict, List, Optional, Set, Tuple, Any
|
|
5
|
+
from networkx.algorithms.isomorphism import generic_node_match, generic_edge_match
|
|
6
|
+
from synkit.IO.chem_converter import gml_to_its
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class RuleMapping:
|
|
10
|
+
@staticmethod
|
|
11
|
+
def enumerate_all_unique_mappings(
|
|
12
|
+
child: nx.Graph, parent: nx.Graph
|
|
13
|
+
) -> List[Dict[Any, Any]]:
|
|
14
|
+
"""
|
|
15
|
+
Generate all unique mappings (as dictionaries) from the child graph to the parent graph.
|
|
16
|
+
A mapping is valid if:
|
|
17
|
+
- Every node from the child graph is assigned exactly one parent node.
|
|
18
|
+
- The parent's node has the same 'element' attribute as the child node.
|
|
19
|
+
- No parent's node is repeated in a mapping.
|
|
20
|
+
|
|
21
|
+
Parameters:
|
|
22
|
+
- child (nx.Graph): The child graph whose nodes will be mapped.
|
|
23
|
+
- parent (nx.Graph): The parent graph in which to search for matching nodes.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
- List[dict]: A list of mapping dictionaries. Each dictionary maps a child node to a unique
|
|
27
|
+
parent node with the same 'element'. If no valid mapping exists, returns an empty list.
|
|
28
|
+
"""
|
|
29
|
+
# Build candidate sets for each node in child, based on matching 'element' in parent
|
|
30
|
+
candidate_map: Dict[Any, List[Any]] = {}
|
|
31
|
+
for node, attrs in child.nodes(data=True):
|
|
32
|
+
element = attrs.get("element")
|
|
33
|
+
# Gather all parent nodes with the same element
|
|
34
|
+
candidates = [
|
|
35
|
+
pnode
|
|
36
|
+
for pnode, p_attrs in parent.nodes(data=True)
|
|
37
|
+
if p_attrs.get("element") == element
|
|
38
|
+
]
|
|
39
|
+
candidate_map[node] = candidates
|
|
40
|
+
|
|
41
|
+
all_mappings: List[Dict[Any, Any]] = []
|
|
42
|
+
child_nodes = list(child.nodes())
|
|
43
|
+
|
|
44
|
+
def backtrack(
|
|
45
|
+
i: int, current_mapping: Dict[Any, Any], used_parents: Set[Any]
|
|
46
|
+
) -> None:
|
|
47
|
+
# If we've assigned every child node, store a copy of the mapping.
|
|
48
|
+
if i == len(child_nodes):
|
|
49
|
+
all_mappings.append(current_mapping.copy())
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
child_node = child_nodes[i]
|
|
53
|
+
for candidate in candidate_map.get(child_node, []):
|
|
54
|
+
if candidate not in used_parents:
|
|
55
|
+
current_mapping[child_node] = candidate
|
|
56
|
+
used_parents.add(candidate)
|
|
57
|
+
backtrack(i + 1, current_mapping, used_parents)
|
|
58
|
+
used_parents.remove(candidate)
|
|
59
|
+
del current_mapping[child_node]
|
|
60
|
+
|
|
61
|
+
# Backtracking to explore all valid mappings
|
|
62
|
+
backtrack(0, {}, set())
|
|
63
|
+
return all_mappings
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def standardize_order(
|
|
67
|
+
order_tuple: Tuple[float, ...],
|
|
68
|
+
) -> Optional[Tuple[float, ...]]:
|
|
69
|
+
"""
|
|
70
|
+
Standardizes an order tuple by adding 1 to every element repeatedly until no element is negative.
|
|
71
|
+
If the resulting tuple becomes all zeros, returns None, which indicates that the edge should be dropped.
|
|
72
|
+
|
|
73
|
+
For example:
|
|
74
|
+
(-1.0, 0.0) --> add 1 gives (0.0, 1.0)
|
|
75
|
+
(-2.0, -1.0) --> add 1 yields (-1.0, 0.0) --> add 1 yields (0.0, 1.0)
|
|
76
|
+
(0.0, 0.0) remains (0.0, 0.0) and then returns None.
|
|
77
|
+
|
|
78
|
+
Parameters:
|
|
79
|
+
- order_tuple (Tuple[float, ...]): The order attribute (tuple of floats).
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
- Optional[Tuple[float, ...]]: The standardized tuple, or None if it becomes all zeros.
|
|
83
|
+
"""
|
|
84
|
+
order_list = list(order_tuple)
|
|
85
|
+
while any(x < 0 for x in order_list):
|
|
86
|
+
order_list = [x + 1 for x in order_list]
|
|
87
|
+
if all(x == 0 for x in order_list):
|
|
88
|
+
return None
|
|
89
|
+
return tuple(order_list)
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def keep_largest_component(graph: nx.Graph) -> nx.Graph:
|
|
93
|
+
"""
|
|
94
|
+
Given an undirected graph, returns the subgraph corresponding to the largest connected component.
|
|
95
|
+
|
|
96
|
+
Parameters:
|
|
97
|
+
- graph (nx.Graph): The input graph from which the largest component is extracted.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
- nx.Graph: A subgraph induced by the largest connected component of the input graph.
|
|
101
|
+
"""
|
|
102
|
+
if graph.number_of_nodes() == 0:
|
|
103
|
+
return graph
|
|
104
|
+
# Find all connected components
|
|
105
|
+
components = list(nx.connected_components(graph))
|
|
106
|
+
# Identify the largest by number of nodes
|
|
107
|
+
largest = max(components, key=len)
|
|
108
|
+
# Return the induced subgraph (as a new, independent graph)
|
|
109
|
+
return graph.subgraph(largest).copy()
|
|
110
|
+
|
|
111
|
+
@staticmethod
|
|
112
|
+
def subtract_parent_from_child(
|
|
113
|
+
child: nx.Graph, parent: nx.Graph, mapping: Dict[Any, Any]
|
|
114
|
+
) -> nx.Graph:
|
|
115
|
+
"""
|
|
116
|
+
Create a new graph by performing a (parent - child) subtraction of edge attributes
|
|
117
|
+
using a given mapping from child nodes to parent nodes. The result is then reduced
|
|
118
|
+
to its largest connected component.
|
|
119
|
+
|
|
120
|
+
Steps:
|
|
121
|
+
1. Make a deep copy of the parent graph and remove all its edges.
|
|
122
|
+
2. Build the union of the parent's edges plus the child's edges mapped into the parent's node IDs.
|
|
123
|
+
3. For each edge in the union (using parent node IDs):
|
|
124
|
+
- new_standard_order = parent's standard_order - child's standard_order.
|
|
125
|
+
- If an 'order' tuple exists:
|
|
126
|
+
a. If one side is missing, assume zeros of appropriate length.
|
|
127
|
+
b. Compute (parent_order - child_order) element-wise.
|
|
128
|
+
c. Standardize the resulting tuple via standardize_order().
|
|
129
|
+
d. If None, omit the edge entirely.
|
|
130
|
+
4. Add each valid edge to the new graph.
|
|
131
|
+
5. Keep only the largest connected component.
|
|
132
|
+
|
|
133
|
+
Parameters:
|
|
134
|
+
- child (nx.Graph): The child graph (provides edge attributes to subtract).
|
|
135
|
+
- parent (nx.Graph): The parent graph (provides baseline edge/node attributes).
|
|
136
|
+
- mapping (Dict[Any, Any]): A one-to-one mapping from child nodes to parent nodes.
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
- nx.Graph: A new graph (deep copy of parent, with edges recomputed),
|
|
140
|
+
reduced to its largest connected component.
|
|
141
|
+
"""
|
|
142
|
+
# 1. Deep copy the parent and remove its edges
|
|
143
|
+
new_graph = copy.deepcopy(parent)
|
|
144
|
+
new_graph.remove_edges_from(list(new_graph.edges()))
|
|
145
|
+
|
|
146
|
+
# 2. Build union of edges. We'll store them in a dictionary (u, v) -> {"parent", "child"}
|
|
147
|
+
union_edges: Dict[Tuple[Any, Any], Dict[str, Dict[str, Any]]] = {}
|
|
148
|
+
|
|
149
|
+
# Parent edges
|
|
150
|
+
for u, v, pdata in parent.edges(data=True):
|
|
151
|
+
key = tuple(sorted([u, v], key=lambda x: str(x)))
|
|
152
|
+
union_edges.setdefault(key, {})["parent"] = pdata
|
|
153
|
+
|
|
154
|
+
# Child edges (mapped)
|
|
155
|
+
for u, v, cdata in child.edges(data=True):
|
|
156
|
+
parent_u = mapping.get(u)
|
|
157
|
+
parent_v = mapping.get(v)
|
|
158
|
+
if parent_u is None or parent_v is None:
|
|
159
|
+
continue
|
|
160
|
+
key = tuple(sorted([parent_u, parent_v], key=lambda x: str(x)))
|
|
161
|
+
union_edges.setdefault(key, {})["child"] = cdata
|
|
162
|
+
|
|
163
|
+
# 3. Compute new edge attributes
|
|
164
|
+
for (u, v), entry in union_edges.items():
|
|
165
|
+
parent_data = entry.get("parent", {})
|
|
166
|
+
child_data = entry.get("child", {})
|
|
167
|
+
|
|
168
|
+
parent_so = parent_data.get("standard_order", 0)
|
|
169
|
+
child_so = child_data.get("standard_order", 0)
|
|
170
|
+
new_so = parent_so - child_so
|
|
171
|
+
|
|
172
|
+
parent_order = parent_data.get("order", None)
|
|
173
|
+
child_order = child_data.get("order", None)
|
|
174
|
+
|
|
175
|
+
new_order = None
|
|
176
|
+
if parent_order is not None or child_order is not None:
|
|
177
|
+
# If one side is missing, assume zero tuple
|
|
178
|
+
if parent_order is None and child_order is not None:
|
|
179
|
+
parent_order = tuple(0 for _ in child_order)
|
|
180
|
+
if child_order is None and parent_order is not None:
|
|
181
|
+
child_order = tuple(0 for _ in parent_order)
|
|
182
|
+
|
|
183
|
+
# Subtract if they match in length
|
|
184
|
+
if (
|
|
185
|
+
isinstance(parent_order, tuple)
|
|
186
|
+
and isinstance(child_order, tuple)
|
|
187
|
+
and len(parent_order) == len(child_order)
|
|
188
|
+
):
|
|
189
|
+
computed_order = tuple(
|
|
190
|
+
p - c for p, c in zip(parent_order, child_order)
|
|
191
|
+
)
|
|
192
|
+
new_order = RuleMapping.standardize_order(computed_order)
|
|
193
|
+
|
|
194
|
+
new_edge_data = {"standard_order": new_so}
|
|
195
|
+
# Only add the 'order' attribute if new_order is not None
|
|
196
|
+
if new_order is not None:
|
|
197
|
+
new_edge_data["order"] = new_order
|
|
198
|
+
# If new_order is None, we skip adding this edge
|
|
199
|
+
|
|
200
|
+
if new_order is not None:
|
|
201
|
+
new_graph.add_edge(u, v, **new_edge_data)
|
|
202
|
+
|
|
203
|
+
# 4. Return the largest connected component
|
|
204
|
+
return RuleMapping.keep_largest_component(new_graph)
|
|
205
|
+
|
|
206
|
+
@staticmethod
|
|
207
|
+
def graph_alignment(
|
|
208
|
+
child: nx.Graph,
|
|
209
|
+
parent: nx.Graph,
|
|
210
|
+
node_label_names: List[str] = ["element"],
|
|
211
|
+
node_label_default: List[str] = ["*"],
|
|
212
|
+
edge_attribute: str = "standard_order",
|
|
213
|
+
) -> Tuple[bool, Optional[Dict[Any, Any]]]:
|
|
214
|
+
"""
|
|
215
|
+
Check whether the child and parent graphs are isomorphic using specified node and edge match criteria.
|
|
216
|
+
If they are isomorphic, return the mapping from child to parent.
|
|
217
|
+
|
|
218
|
+
Parameters:
|
|
219
|
+
- child (nx.Graph): The child graph to align.
|
|
220
|
+
- parent (nx.Graph): The parent graph to align with.
|
|
221
|
+
- node_label_names (List[str]): Node attribute names for matching (default: ["element"]).
|
|
222
|
+
- node_label_default (List[str]): Default values for those attributes if missing (default: ["*"]).
|
|
223
|
+
- edge_attribute (str): The edge attribute to match (default: "standard_order").
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
- Tuple[bool, Optional[Dict[Any, Any]]]:
|
|
227
|
+
A tuple (is_iso, mapping):
|
|
228
|
+
- is_iso (bool): True if the graphs are isomorphic; otherwise, False.
|
|
229
|
+
- mapping (dict or None): The child→parent node mapping if isomorphic, else None.
|
|
230
|
+
"""
|
|
231
|
+
node_match = generic_node_match(
|
|
232
|
+
node_label_names, node_label_default, [eq] * len(node_label_names)
|
|
233
|
+
)
|
|
234
|
+
edge_match = generic_edge_match(edge_attribute, 1, eq)
|
|
235
|
+
|
|
236
|
+
gm = nx.algorithms.isomorphism.GraphMatcher(
|
|
237
|
+
child, parent, node_match=node_match, edge_match=edge_match
|
|
238
|
+
)
|
|
239
|
+
is_iso = gm.is_isomorphic()
|
|
240
|
+
return is_iso, (gm.mapping if is_iso else None)
|
|
241
|
+
|
|
242
|
+
@staticmethod
|
|
243
|
+
def get_child1_to_child2_mapping(
|
|
244
|
+
mapping_child1_to_parent: Dict[Any, Any],
|
|
245
|
+
mapping_child2_to_parent: Dict[Any, Any],
|
|
246
|
+
) -> Dict[Any, Optional[Any]]:
|
|
247
|
+
"""
|
|
248
|
+
Build a mapping from Child1 to Child2 using each child's mapping to a common Parent.
|
|
249
|
+
|
|
250
|
+
If a Parent node in Child1's mapping is not in Child2's inverted mapping,
|
|
251
|
+
that Child1 node will map to None.
|
|
252
|
+
|
|
253
|
+
Parameters:
|
|
254
|
+
- mapping_child1_to_parent (dict): Mapping from Child1 nodes → Parent nodes.
|
|
255
|
+
- mapping_child2_to_parent (dict): Mapping from Child2 nodes → Parent nodes.
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
- dict: A dictionary from Child1 node → Child2 node based on the shared Parent node.
|
|
259
|
+
"""
|
|
260
|
+
# Invert Child2→Parent to get Parent→Child2
|
|
261
|
+
inverted_child2 = {
|
|
262
|
+
parent_node: child2_node
|
|
263
|
+
for child2_node, parent_node in mapping_child2_to_parent.items()
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
# Build Child1→Child2 by looking up each Parent node in the inverted Child2 mapping
|
|
267
|
+
mapping_child1_to_child2: Dict[Any, Optional[Any]] = {}
|
|
268
|
+
for child1_node, parent_node in mapping_child1_to_parent.items():
|
|
269
|
+
mapping_child1_to_child2[child1_node] = inverted_child2.get(
|
|
270
|
+
parent_node, None
|
|
271
|
+
)
|
|
272
|
+
mapping_child1_to_child2 = {
|
|
273
|
+
key: value
|
|
274
|
+
for key, value in mapping_child1_to_child2.items()
|
|
275
|
+
if value is not None
|
|
276
|
+
}
|
|
277
|
+
return mapping_child1_to_child2
|
|
278
|
+
|
|
279
|
+
def fit(self, rule_1: str, rule_2: str, comp_rule: str) -> Optional[Dict[Any, Any]]:
|
|
280
|
+
"""
|
|
281
|
+
Demonstrate an alignment-based composition workflow using the class methods.
|
|
282
|
+
|
|
283
|
+
1. Convert each GML-based rule into an internal graph (via gml_to_its).
|
|
284
|
+
2. Enumerate all unique mappings from rule_2 to comp_rule.
|
|
285
|
+
3. For each mapping, subtract rule_2 from comp_rule using that mapping.
|
|
286
|
+
4. Check if rule_1 is isomorphic to the resulting new graph.
|
|
287
|
+
- If isomorphic, build a child1→child2 mapping and return it.
|
|
288
|
+
|
|
289
|
+
Parameters:
|
|
290
|
+
- rule_1 (str): GML representation of the first rule.
|
|
291
|
+
- rule_2 (str): GML representation of the second rule.
|
|
292
|
+
- comp_rule (str): GML representation of a composite rule.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
- Optional[dict]: A dictionary mapping rule_1's nodes to the new_graph's nodes if alignment is found.
|
|
296
|
+
Returns None otherwise.
|
|
297
|
+
"""
|
|
298
|
+
# Convert GML to internal graph structures
|
|
299
|
+
rc_1 = gml_to_its(rule_1)
|
|
300
|
+
rc_2 = gml_to_its(rule_2)
|
|
301
|
+
comp_its = gml_to_its(comp_rule)
|
|
302
|
+
|
|
303
|
+
# Enumerate mappings from rule_2 → comp_rule
|
|
304
|
+
maps_2 = self.enumerate_all_unique_mappings(rc_2, comp_its)
|
|
305
|
+
for map_2 in maps_2:
|
|
306
|
+
# Subtract rule_2 from comp_rule with this mapping
|
|
307
|
+
new_graph = self.subtract_parent_from_child(rc_2, comp_its, map_2)
|
|
308
|
+
# Check if rule_1 is isomorphic to new_graph
|
|
309
|
+
is_iso, map_1 = self.graph_alignment(rc_1, new_graph)
|
|
310
|
+
if is_iso and map_1 is not None:
|
|
311
|
+
# If isomorphic, build a final mapping from rule_1 → new_graph
|
|
312
|
+
mappings = self.get_child1_to_child2_mapping(map_1, map_2)
|
|
313
|
+
return mappings
|
|
314
|
+
return None
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from typing import List, Dict, Optional
|
|
2
|
+
from synkit.IO.chem_converter import smart_to_gml
|
|
3
|
+
from synkit.Rule.compose_rule import ComposeRule
|
|
4
|
+
from synkit.Rule.rule_mapping import RuleMapping
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SeqComp:
|
|
8
|
+
"""
|
|
9
|
+
A class for generating pairwise mappings between sequential chemical reaction rules.
|
|
10
|
+
|
|
11
|
+
This class takes a list of reaction SMARTS strings, converts them to their corresponding
|
|
12
|
+
GML representations, composes candidate reaction rules for each consecutive pair, and computes
|
|
13
|
+
a mapping between the rules using a rule mapping algorithm.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self) -> None:
|
|
17
|
+
"""
|
|
18
|
+
Initialize an instance of the SeqComp class.
|
|
19
|
+
"""
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def sequence_map(smarts: List[str]) -> Dict[str, Optional[dict]]:
|
|
24
|
+
"""
|
|
25
|
+
Generate pairwise mapping dictionaries between consecutive reaction SMARTS strings.
|
|
26
|
+
|
|
27
|
+
This function processes a list of reaction SMARTS strings by:
|
|
28
|
+
1. Converting each SMARTS string to its GML representation.
|
|
29
|
+
2. For each consecutive pair, composing candidate rules using ComposeRule().get_rule_comp().
|
|
30
|
+
3. Using the first candidate (if available) and the original GMLs to compute a mapping
|
|
31
|
+
using RuleMapping().fit().
|
|
32
|
+
4. Storing the resulting mapping in a dictionary with keys in the format "i:i+1".
|
|
33
|
+
|
|
34
|
+
Parameters:
|
|
35
|
+
- smarts (List[str]): The list of reaction SMARTS strings.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
- Dict[str, Optional[dict]]:
|
|
39
|
+
A dictionary where each key is a string "i:i+1" representing the consecutive pair indices,
|
|
40
|
+
and the corresponding value is the mapping dictionary produced by RuleMapping().fit()
|
|
41
|
+
for that pair, or None if no valid mapping could be computed.
|
|
42
|
+
"""
|
|
43
|
+
# Convert each SMARTS string to its GML representation.
|
|
44
|
+
gml_list = [smart_to_gml(s, sanitize=True) for s in smarts]
|
|
45
|
+
mappings: Dict[str, Optional[dict]] = {}
|
|
46
|
+
|
|
47
|
+
# Process each consecutive pair in the list.
|
|
48
|
+
for i in range(len(gml_list) - 1):
|
|
49
|
+
# Get the consecutive SMARTS and GML representations.
|
|
50
|
+
smart_a = smarts[i]
|
|
51
|
+
smart_b = smarts[i + 1]
|
|
52
|
+
rule_a = gml_list[i]
|
|
53
|
+
rule_b = gml_list[i + 1]
|
|
54
|
+
|
|
55
|
+
# Compose candidate rules between smart_a and smart_b.
|
|
56
|
+
candidate_rules = ComposeRule().get_rule_comp(smart_a, smart_b)
|
|
57
|
+
|
|
58
|
+
# If no candidate rule is found, assign None for this pair.
|
|
59
|
+
if not candidate_rules:
|
|
60
|
+
mappings[f"{i}:{i+1}"] = None
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
# Try to compute the mapping using the first candidate rule.
|
|
64
|
+
try:
|
|
65
|
+
mapping_result = RuleMapping().fit(rule_a, rule_b, candidate_rules[0])
|
|
66
|
+
except Exception as e:
|
|
67
|
+
print(f"Error computing mapping for pair {i}:{i+1}: {e}")
|
|
68
|
+
mapping_result = None
|
|
69
|
+
|
|
70
|
+
mappings[f"{i}:{i+1}"] = mapping_result
|
|
71
|
+
|
|
72
|
+
return mappings
|
|
@@ -31,7 +31,7 @@ def filter_context(context_lines, left_edges):
|
|
|
31
31
|
return filtered_context
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
def strip_context(gml_text: str, remove_all: bool =
|
|
34
|
+
def strip_context(gml_text: str, remove_all: bool = False) -> str:
|
|
35
35
|
"""
|
|
36
36
|
Filters or clears the 'context' section of GML-like content based on the remove_all flag.
|
|
37
37
|
If remove_all is True, all edges in the 'context' section are removed.
|