synkit 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. synkit-0.0.1/.github/workflows/publish-package.yml +49 -0
  2. synkit-0.0.1/.github/workflows/test-and-lint.yml +55 -0
  3. synkit-0.0.1/.gitignore +10 -0
  4. synkit-0.0.1/CHANGELOG.md +0 -0
  5. synkit-0.0.1/Data/Figure/synkit.png +0 -0
  6. synkit-0.0.1/Data/Testcase/mech.json.gz +1 -0
  7. synkit-0.0.1/LICENSE +21 -0
  8. synkit-0.0.1/PKG-INFO +148 -0
  9. synkit-0.0.1/README.md +119 -0
  10. synkit-0.0.1/Test/Chem/Fingerprint/__init__.py +0 -0
  11. synkit-0.0.1/Test/Chem/Fingerprint/test_fp_calculator.py +74 -0
  12. synkit-0.0.1/Test/Chem/Fingerprint/test_smiles_featurizer.py +85 -0
  13. synkit-0.0.1/Test/Chem/Fingerprint/test_transformation_fp.py +56 -0
  14. synkit-0.0.1/Test/Chem/Molecule/__init__.py +0 -0
  15. synkit-0.0.1/Test/Chem/Molecule/test_standardize.py +90 -0
  16. synkit-0.0.1/Test/Chem/Reaction/__init__.py +0 -0
  17. synkit-0.0.1/Test/Chem/Reaction/test_balance_checker.py +120 -0
  18. synkit-0.0.1/Test/Chem/Reaction/test_cleanning.py +26 -0
  19. synkit-0.0.1/Test/Chem/Reaction/test_deionize.py +52 -0
  20. synkit-0.0.1/Test/Chem/Reaction/test_neutralize.py +160 -0
  21. synkit-0.0.1/Test/Chem/Reaction/test_reagent.py +98 -0
  22. synkit-0.0.1/Test/Chem/Reaction/test_standardize.py +58 -0
  23. synkit-0.0.1/Test/Chem/Reaction/test_tautomerize.py +63 -0
  24. synkit-0.0.1/Test/Chem/__init__.py +0 -0
  25. synkit-0.0.1/Test/Graph/Cluster/__init__.py +0 -0
  26. synkit-0.0.1/Test/Graph/Feature/__init__.py +0 -0
  27. synkit-0.0.1/Test/Graph/Feature/test_graph_descriptors.py +195 -0
  28. synkit-0.0.1/Test/Graph/Feature/test_graph_fps.py +59 -0
  29. synkit-0.0.1/Test/Graph/Feature/test_graph_signature.py +101 -0
  30. synkit-0.0.1/Test/Graph/Feature/test_hash_fps.py +52 -0
  31. synkit-0.0.1/Test/Graph/Feature/test_morgan_fps.py +39 -0
  32. synkit-0.0.1/Test/Graph/Feature/test_path_fps.py +40 -0
  33. synkit-0.0.1/Test/Graph/__init__.py +0 -0
  34. synkit-0.0.1/Test/IO/__init__.py +0 -0
  35. synkit-0.0.1/Test/IO/test_chemical_conversion.py +143 -0
  36. synkit-0.0.1/Test/IO/test_gml_to_nx.py +132 -0
  37. synkit-0.0.1/Test/IO/test_graph_to_mol.py +59 -0
  38. synkit-0.0.1/Test/IO/test_mol_to_graph.py +49 -0
  39. synkit-0.0.1/Test/IO/test_nx_to_gml.py +101 -0
  40. synkit-0.0.1/Test/ITS/__init__.py +0 -0
  41. synkit-0.0.1/Test/ITS/test_aam_validator.py +97 -0
  42. synkit-0.0.1/Test/ITS/test_its_construction.py +51 -0
  43. synkit-0.0.1/Test/ITS/test_normalize_aam.py +63 -0
  44. synkit-0.0.1/Test/Reactor/__init__.py +0 -0
  45. synkit-0.0.1/Test/Reactor/test_core_engine.py +112 -0
  46. synkit-0.0.1/Test/Reactor/test_multi_step.py +50 -0
  47. synkit-0.0.1/Test/Reactor/test_multiple_step_aam.py +67 -0
  48. synkit-0.0.1/Test/Reactor/test_reagent.py +58 -0
  49. synkit-0.0.1/Test/Vis/__init__.py +0 -0
  50. synkit-0.0.1/Test/Vis/test_embedding.py +50 -0
  51. synkit-0.0.1/Test/__init__.py +0 -0
  52. synkit-0.0.1/lint.sh +6 -0
  53. synkit-0.0.1/pyproject.toml +37 -0
  54. synkit-0.0.1/pytest.sh +3 -0
  55. synkit-0.0.1/requirements.txt +13 -0
  56. synkit-0.0.1/synkit/Chem/Fingerprint/__init__.py +0 -0
  57. synkit-0.0.1/synkit/Chem/Fingerprint/fp_calculator.py +122 -0
  58. synkit-0.0.1/synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
  59. synkit-0.0.1/synkit/Chem/Fingerprint/transformation_fp.py +79 -0
  60. synkit-0.0.1/synkit/Chem/Molecule/__init__.py +0 -0
  61. synkit-0.0.1/synkit/Chem/Molecule/standardize.py +137 -0
  62. synkit-0.0.1/synkit/Chem/Reaction/__init__.py +0 -0
  63. synkit-0.0.1/synkit/Chem/Reaction/balance_check.py +162 -0
  64. synkit-0.0.1/synkit/Chem/Reaction/cleanning.py +59 -0
  65. synkit-0.0.1/synkit/Chem/Reaction/deionize.py +289 -0
  66. synkit-0.0.1/synkit/Chem/Reaction/neutralize.py +256 -0
  67. synkit-0.0.1/synkit/Chem/Reaction/reagent.py +102 -0
  68. synkit-0.0.1/synkit/Chem/Reaction/standardize.py +157 -0
  69. synkit-0.0.1/synkit/Chem/Reaction/tautomerize.py +168 -0
  70. synkit-0.0.1/synkit/Graph/Cluster/__init__.py +0 -0
  71. synkit-0.0.1/synkit/Graph/Cluster/morphism.py +83 -0
  72. synkit-0.0.1/synkit/Graph/Feature/__init__.py +0 -0
  73. synkit-0.0.1/synkit/Graph/Feature/graph_descriptors.py +325 -0
  74. synkit-0.0.1/synkit/Graph/Feature/graph_fps.py +97 -0
  75. synkit-0.0.1/synkit/Graph/Feature/graph_signature.py +236 -0
  76. synkit-0.0.1/synkit/Graph/Feature/hash_fps.py +130 -0
  77. synkit-0.0.1/synkit/Graph/Feature/morgan_fps.py +87 -0
  78. synkit-0.0.1/synkit/Graph/Feature/path_fps.py +82 -0
  79. synkit-0.0.1/synkit/Graph/__init.py +0 -0
  80. synkit-0.0.1/synkit/IO/__init__.py +0 -0
  81. synkit-0.0.1/synkit/IO/chem_converter.py +231 -0
  82. synkit-0.0.1/synkit/IO/data_io.py +277 -0
  83. synkit-0.0.1/synkit/IO/data_process.py +49 -0
  84. synkit-0.0.1/synkit/IO/debug.py +78 -0
  85. synkit-0.0.1/synkit/IO/dg_to_gml.py +124 -0
  86. synkit-0.0.1/synkit/IO/gml_to_nx.py +119 -0
  87. synkit-0.0.1/synkit/IO/graph_to_mol.py +110 -0
  88. synkit-0.0.1/synkit/IO/mol_to_graph.py +282 -0
  89. synkit-0.0.1/synkit/IO/nx_to_gml.py +200 -0
  90. synkit-0.0.1/synkit/IO/parse_rule.py +172 -0
  91. synkit-0.0.1/synkit/IO/smiles_to_id.py +119 -0
  92. synkit-0.0.1/synkit/ITS/_misc.py +280 -0
  93. synkit-0.0.1/synkit/ITS/aam_validator.py +254 -0
  94. synkit-0.0.1/synkit/ITS/its_builder.py +94 -0
  95. synkit-0.0.1/synkit/ITS/its_construction.py +213 -0
  96. synkit-0.0.1/synkit/ITS/normalize_aam.py +183 -0
  97. synkit-0.0.1/synkit/ITS/partial_expand.py +170 -0
  98. synkit-0.0.1/synkit/Reactor/__init__.py +0 -0
  99. synkit-0.0.1/synkit/Reactor/core_engine.py +164 -0
  100. synkit-0.0.1/synkit/Reactor/inference.py +73 -0
  101. synkit-0.0.1/synkit/Reactor/multi_step.py +227 -0
  102. synkit-0.0.1/synkit/Reactor/multi_step_aam.py +82 -0
  103. synkit-0.0.1/synkit/Reactor/reagent.py +95 -0
  104. synkit-0.0.1/synkit/Reactor/rule_apply.py +81 -0
  105. synkit-0.0.1/synkit/Vis/__init__.py +0 -0
  106. synkit-0.0.1/synkit/Vis/chemical_graph_visualizer.py +378 -0
  107. synkit-0.0.1/synkit/Vis/chemical_reaction_visualizer.py +133 -0
  108. synkit-0.0.1/synkit/Vis/chemical_space.py +83 -0
  109. synkit-0.0.1/synkit/Vis/embedding.py +92 -0
  110. synkit-0.0.1/synkit/Vis/graph_visualizer.py +286 -0
  111. synkit-0.0.1/synkit/Vis/pdf_writer.py +143 -0
  112. synkit-0.0.1/synkit/Vis/rsmi_to_fig.py +169 -0
  113. synkit-0.0.1/synkit/__init__.py +0 -0
  114. synkit-0.0.1/synkit/_misc.py +181 -0
@@ -0,0 +1,49 @@
1
+ name: PyPI publish
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ contents: read
9
+
10
+ jobs:
11
+ build:
12
+ name: Build package
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - name: Checkout
16
+ uses: actions/checkout@v3
17
+ - name: Set up Python
18
+ uses: actions/setup-python@v3
19
+ with:
20
+ python-version: '3.x'
21
+ - name: Install dependencies
22
+ run: |
23
+ python -m pip install --upgrade pip
24
+ pip install build
25
+ - name: Build package
26
+ run: python -m build
27
+ - name: Upload artifact
28
+ uses: actions/upload-artifact@v4
29
+ with:
30
+ name: package
31
+ path: dist/
32
+
33
+ publish:
34
+ name: Upload release to PyPI
35
+ needs: build
36
+ runs-on: ubuntu-latest
37
+ environment:
38
+ name: pypi
39
+ url: https://pypi.org/p/synkit
40
+ permissions:
41
+ id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
42
+ steps:
43
+ - name: Download artifact
44
+ uses: actions/download-artifact@v4
45
+ with:
46
+ name: package
47
+ path: dist/
48
+ - name: Publish package to PyPI
49
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,55 @@
1
+ # This workflow will install Python dependencies, run tests and lint with a
2
+ # single version of Python
3
+ # For more information see:
4
+ # https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
5
+
6
+ name: Test & Lint
7
+
8
+ on:
9
+ push:
10
+ branches: [ "main", "dev", "maintain" ]
11
+ pull_request:
12
+ branches: [ "main" ]
13
+
14
+ permissions:
15
+ contents: read
16
+
17
+ jobs:
18
+ build:
19
+
20
+ runs-on: ubuntu-latest
21
+
22
+ steps:
23
+ - uses: actions/checkout@v3
24
+
25
+ # Set up Miniconda
26
+ - name: Set up Miniconda
27
+ uses: conda-incubator/setup-miniconda@v2
28
+ with:
29
+ auto-update-conda: true
30
+ python-version: 3.11
31
+
32
+ # Create and activate Conda environment
33
+ - name: Create and activate environment
34
+ run: |
35
+ conda create --name synkit-env python=3.11 -y
36
+ conda activate synkit-env
37
+ conda install -c jakobandersen -c conda-forge mod
38
+ pip install flake8 pytest
39
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
40
+ shell: bash -l {0}
41
+
42
+ # Lint with flake8
43
+ - name: Lint with flake8
44
+ run: |
45
+ conda activate synkit-env
46
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
47
+ ./lint.sh
48
+ shell: bash -l {0}
49
+
50
+ # Test with pytest
51
+ - name: Test with pytest
52
+ run: |
53
+ conda activate synkit-env
54
+ ./pytest.sh
55
+ shell: bash -l {0}
@@ -0,0 +1,10 @@
1
+ *.pyc
2
+ *cachedir
3
+ *.csv
4
+ */catboost_info/*
5
+ *.ipynb
6
+ *.json
7
+ test_mod.py
8
+ test_format.py
9
+ *dev_zone
10
+ *.pkl.gz
File without changes
Binary file
@@ -0,0 +1 @@
1
+ [{"R-id": "Mech-1", "reaction": "[CH3:1][CH:2]=[O:3].[CH:4]([H:7])([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O:3]([H:7])([H:8])", "mechanisms": [{"mech_name": "Aldol reaction (base cat)", "steps": [{"description": "Base abstracts H from substrate", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6].[*-:9]>>[CH-:4]([H:8])[CH:5]=[O:6].[*:9][H:7]", "step": 1, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Base abstracts H from substrate"}, {"description": "Nucleophilic addition fro substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH-:4]([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]([O-:3])[CH:4]([H:8])[CH:5]=[O:6]", "step": 2, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"=\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n ]\n right [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n node [ id 2 label \"O-\" ]\n node [ id 3 label \"C\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]=[O:2].[CH3-:3]>>[CH2:1]([O-:2])[CH3:3]", "step_desc": "Nucleophilic addition fro substrate"}, {"description": "Neutralize substrate", "smart_string": "[CH3:1][CH:2]([O-:3])[CH:4]([H:8])[CH:5]=[O:6].[*:9][H:7]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[*-:9]", "step": 3, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Neutralize substrate"}, {"description": "Base abstracts H from substrate", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[*-:9]>>[CH3:1][CH:2]([O:3][H:7])[CH-:4][CH:5]=[O:6].[*:9][H:8]", "step": 4, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Base abstracts H from substrate"}, {"description": "Elimination Unimolecular Conjugate Base", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH-:4][CH:5]=[O:6]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O-:3][H:7]", "step": 5, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n ]\n right [\n edge [ source 1 target 3 label \"=\" ]\n node [ id 2 label \"O-\" ]\n node [ id 3 label \"C\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]([OH:2])[CH2-:3]>>[CH2:1]=[CH2:3].[OH-:2]", "step_desc": "Elimination Unimolecular Conjugate Base"}, {"description": "Neutralize Hydroxide", "smart_string": "[O-:3][H:7].[*:9][H:8]>>[O:3]([H:7])([H:8]).[*-:9]", "step": 6, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Neutralize Hydroxide"}], "cat": "[*-]"}, {"mech_name": "Aldol reaction (neutral cat)", "steps": [{"description": "Tautomerization of substrate", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6]>>[CH:4]([H:8])=[CH:5][O:6]([H:7])", "step": 1, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 2 target 3 label \"=\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"C\" ]\n node [ id 3 label \"O\" ]\n ]\n right [\n edge [ source 1 target 2 label \"=\" ]\n edge [ source 2 target 3 label \"-\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH3:1][CH:2]=[O:3]>>[CH2:1]=[CH:2][OH:3]", "step_desc": "Tautomerization of substrate"}, {"description": "Nucleophilic addition from substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH:4]([H:8])=[CH:5][O:6]([H:7])>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6]", "step": 2, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"=\" ]\n edge [ source 3 target 4 label \"=\" ]\n edge [ source 4 target 5 label \"-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C\" ]\n node [ id 4 label \"C\" ]\n node [ id 5 label \"O\" ]\n ]\n right [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n edge [ source 3 target 4 label \"-\" ]\n edge [ source 4 target 5 label \"=\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]=[O:2].[CH2:3]=[CH:4][OH:5]>>[CH2:1]([OH:2])[CH2:3][CH:4]=[O:5]", "step_desc": "Nucleophilic addition from substrate"}, {"description": "Tautomerization of substrate", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:8])", "step": 3, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 2 target 3 label \"=\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"C\" ]\n node [ id 3 label \"O\" ]\n ]\n right [\n edge [ source 1 target 2 label \"=\" ]\n edge [ source 2 target 3 label \"-\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH3:1][CH:2]=[O:3]>>[CH2:1]=[CH:2][OH:3]", "step_desc": "Tautomerization of substrate"}, {"description": "Elimination", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:8])>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O:3]([H:7])([H:8])", "step": 4, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n edge [ source 3 target 4 label \"=\" ]\n edge [ source 4 target 5 label \"-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C\" ]\n node [ id 4 label \"C\" ]\n node [ id 5 label \"O\" ]\n ]\n right [\n edge [ source 1 target 3 label \"=\" ]\n edge [ source 3 target 4 label \"-\" ]\n edge [ source 4 target 5 label \"=\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]([OH:2])[CH:3]=[CH:4][OH:5]>>[CH2:1]=[CH:3][CH:4]=[O:5].[OH2:2]", "step_desc": "Elimination"}], "cat": ""}, {"mech_name": "Aldol reaction (acid cat)", "steps": [{"description": "Tautomerization of substrate with acid cat", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6].[H+:9]>>[CH:4]([H:8])=[CH:5][O:6]([H:9]).[H+:7]", "step": 1}, {"description": "Nucleophilic addition from substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH:4]([H:8])=[CH:5][O:6]([H:9]).[H+:7]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[H+:9]", "step": 2}, {"description": "Tautomerization of substrate with acid cat", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[H+:9]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:9]).[H+:8]", "step": 3}, {"description": "Elimination", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:9]).[H+:8]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[H+:9].[O:3]([H:7])([H:8])", "step": 4}], "cat": "[H+]"}]}]
synkit-0.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Tieu Long Phan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
synkit-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,148 @@
1
+ Metadata-Version: 2.4
2
+ Name: synkit
3
+ Version: 0.0.1
4
+ Summary: Utility for reaction modeling using graph grammar
5
+ Project-URL: homepage, https://github.com/TieuLongPhan/SynKit
6
+ Project-URL: source, https://github.com/TieuLongPhan/SynKit
7
+ Project-URL: issues, https://github.com/TieuLongPhan/SynKit/issues
8
+ Project-URL: documentation, https://tieulongphan.github.io/SynKit/
9
+ Author-email: Tieu Long Phan <tieu@bioinf.uni-leipzig.de>
10
+ License-File: LICENSE
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Requires-Python: >=3.11
15
+ Requires-Dist: networkx>=3.3
16
+ Requires-Dist: pandas>=1.5.3
17
+ Requires-Dist: rdkit>=2024.3.3
18
+ Requires-Dist: requests>=2.32.3
19
+ Requires-Dist: scikit-learn>=1.4.0
20
+ Requires-Dist: seaborn>=0.13.2
21
+ Provides-Extra: all
22
+ Requires-Dist: drfp==0.3.6; extra == 'all'
23
+ Requires-Dist: fgutils>=0.1.3; extra == 'all'
24
+ Requires-Dist: rxn-chem-utils==1.5.0; extra == 'all'
25
+ Requires-Dist: rxn-utils==2.0.0; extra == 'all'
26
+ Requires-Dist: rxnmapper==0.3.0; extra == 'all'
27
+ Requires-Dist: xgboost>=2.1.1; extra == 'all'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # SynKit
31
+
32
+ **Toolkit for Synthesis Planning**
33
+
34
+ SynKit is a collection of tools designed to support the planning and execution of chemical synthesis.
35
+
36
+ ![SynKit](https://raw.githubusercontent.com/TieuLongPhan/SynKit/main/Data/Figure/synkit.png)
37
+
38
+ Our tools are tailored to assist researchers and chemists in navigating complex chemical reactions and synthesis pathways, leveraging the power of modern computational chemistry. Whether you're designing novel compounds or optimizing existing processes, ``synkit`` aims to provide the critical tools you need.
39
+
40
+ For more details on each utility within the repository, please refer to the documentation provided in the respective folders.
41
+
42
+ ## Step-by-Step Installation Guide
43
+
44
+ 1. **Python Installation:**
45
+ Ensure that Python 3.11 or later is installed on your system. You can download it from [python.org](https://www.python.org/downloads/).
46
+
47
+ 2. **Creating a Virtual Environment (Optional but Recommended):**
48
+ It's recommended to use a virtual environment to avoid conflicts with other projects or system-wide packages. Use the following commands to create and activate a virtual environment:
49
+
50
+ ```bash
51
+ python -m venv synkit-env
52
+ source synkit-env/bin/activate
53
+ ```
54
+ Or Conda
55
+
56
+ ```bash
57
+ conda create --name synkit-env python=3.11
58
+ conda activate synkit-env
59
+ ```
60
+
61
+ 3. **Install from PyPi:**
62
+ The easiest way to use SynTemp is by installing the PyPI package
63
+ [synkit](https://pypi.org/project/synkit/).
64
+
65
+ ```
66
+ pip install synkit
67
+ ```
68
+ Optional if you want to install full version
69
+ ```
70
+ pip install synkit[all]
71
+ ```
72
+
73
+ ## For contributors
74
+
75
+ We're welcoming new contributors to build this project better. Please not hesitate to inquire me via [email][tieu@bioinf.uni-leipzig.de].
76
+
77
+ Before you start, ensure your local development environment is set up correctly. Pull the latest version of the `main` branch to start with the most recent stable code.
78
+
79
+ ```bash
80
+ git checkout main
81
+ git pull
82
+ ```
83
+
84
+ ## Working on New Features
85
+
86
+ 1. **Create a New Branch**:
87
+ For every new feature or bug fix, create a new branch from the `main` branch. Name your branch meaningfully, related to the feature or fix you are working on.
88
+
89
+ ```bash
90
+ git checkout -b feature/your-feature-name
91
+ ```
92
+
93
+ 2. **Develop and Commit Changes**:
94
+ Make your changes locally, commit them to your branch. Keep your commits small and focused; each should represent a logical unit of work.
95
+
96
+ ```bash
97
+ git commit -m "Describe the change"
98
+ ```
99
+
100
+ 3. **Run Quality Checks**:
101
+ Before finalizing your feature, run the following commands to ensure your code meets our formatting standards and passes all tests:
102
+
103
+ ```bash
104
+ ./lint.sh # Check code format
105
+ pytest Test # Run tests
106
+ ```
107
+
108
+ Fix any issues or errors highlighted by these checks.
109
+
110
+ ## Integrating Changes
111
+
112
+ 1. **Rebase onto Staging**:
113
+ Once your feature is complete and tests pass, rebase your changes onto the `staging` branch to prepare for integration.
114
+
115
+ ```bash
116
+ git fetch origin
117
+ git rebase origin/staging
118
+ ```
119
+
120
+ Carefully resolve any conflicts that arise during the rebase.
121
+
122
+ 2. **Push to Your Feature Branch**:
123
+ After successfully rebasing, push your branch to the remote repository.
124
+
125
+ ```bash
126
+ git push origin feature/your-feature-name
127
+ ```
128
+
129
+ 3. **Create a Pull Request**:
130
+ Open a pull request from your feature branch to the `stagging` branch. Ensure the pull request description clearly describes the changes and any additional context necessary for review.
131
+
132
+ ## Contributing
133
+ - [Tieu-Long Phan](https://tieulongphan.github.io/)
134
+ - [Klaus Weinbauer](https://github.com/klausweinbauer)
135
+ - [Phuoc-Chung Nguyen Van](https://github.com/phuocchung123)
136
+
137
+ ## Deployment timeline
138
+
139
+ We plan to update new version quarterly.
140
+
141
+
142
+ ## License
143
+
144
+ This project is licensed under MIT License - see the [License](LICENSE) file for details.
145
+
146
+ ## Acknowledgments
147
+
148
+ This project has received funding from the European Unions Horizon Europe Doctoral Network programme under the Marie-Skłodowska-Curie grant agreement No 101072930 ([TACsy](https://tacsy.eu/) -- Training Alliance for Computational)
synkit-0.0.1/README.md ADDED
@@ -0,0 +1,119 @@
1
+ # SynKit
2
+
3
+ **Toolkit for Synthesis Planning**
4
+
5
+ SynKit is a collection of tools designed to support the planning and execution of chemical synthesis.
6
+
7
+ ![SynKit](https://raw.githubusercontent.com/TieuLongPhan/SynKit/main/Data/Figure/synkit.png)
8
+
9
+ Our tools are tailored to assist researchers and chemists in navigating complex chemical reactions and synthesis pathways, leveraging the power of modern computational chemistry. Whether you're designing novel compounds or optimizing existing processes, ``synkit`` aims to provide the critical tools you need.
10
+
11
+ For more details on each utility within the repository, please refer to the documentation provided in the respective folders.
12
+
13
+ ## Step-by-Step Installation Guide
14
+
15
+ 1. **Python Installation:**
16
+ Ensure that Python 3.11 or later is installed on your system. You can download it from [python.org](https://www.python.org/downloads/).
17
+
18
+ 2. **Creating a Virtual Environment (Optional but Recommended):**
19
+ It's recommended to use a virtual environment to avoid conflicts with other projects or system-wide packages. Use the following commands to create and activate a virtual environment:
20
+
21
+ ```bash
22
+ python -m venv synkit-env
23
+ source synkit-env/bin/activate
24
+ ```
25
+ Or Conda
26
+
27
+ ```bash
28
+ conda create --name synkit-env python=3.11
29
+ conda activate synkit-env
30
+ ```
31
+
32
+ 3. **Install from PyPi:**
33
+ The easiest way to use SynTemp is by installing the PyPI package
34
+ [synkit](https://pypi.org/project/synkit/).
35
+
36
+ ```
37
+ pip install synkit
38
+ ```
39
+ Optional if you want to install full version
40
+ ```
41
+ pip install synkit[all]
42
+ ```
43
+
44
+ ## For contributors
45
+
46
+ We're welcoming new contributors to build this project better. Please not hesitate to inquire me via [email][tieu@bioinf.uni-leipzig.de].
47
+
48
+ Before you start, ensure your local development environment is set up correctly. Pull the latest version of the `main` branch to start with the most recent stable code.
49
+
50
+ ```bash
51
+ git checkout main
52
+ git pull
53
+ ```
54
+
55
+ ## Working on New Features
56
+
57
+ 1. **Create a New Branch**:
58
+ For every new feature or bug fix, create a new branch from the `main` branch. Name your branch meaningfully, related to the feature or fix you are working on.
59
+
60
+ ```bash
61
+ git checkout -b feature/your-feature-name
62
+ ```
63
+
64
+ 2. **Develop and Commit Changes**:
65
+ Make your changes locally, commit them to your branch. Keep your commits small and focused; each should represent a logical unit of work.
66
+
67
+ ```bash
68
+ git commit -m "Describe the change"
69
+ ```
70
+
71
+ 3. **Run Quality Checks**:
72
+ Before finalizing your feature, run the following commands to ensure your code meets our formatting standards and passes all tests:
73
+
74
+ ```bash
75
+ ./lint.sh # Check code format
76
+ pytest Test # Run tests
77
+ ```
78
+
79
+ Fix any issues or errors highlighted by these checks.
80
+
81
+ ## Integrating Changes
82
+
83
+ 1. **Rebase onto Staging**:
84
+ Once your feature is complete and tests pass, rebase your changes onto the `staging` branch to prepare for integration.
85
+
86
+ ```bash
87
+ git fetch origin
88
+ git rebase origin/staging
89
+ ```
90
+
91
+ Carefully resolve any conflicts that arise during the rebase.
92
+
93
+ 2. **Push to Your Feature Branch**:
94
+ After successfully rebasing, push your branch to the remote repository.
95
+
96
+ ```bash
97
+ git push origin feature/your-feature-name
98
+ ```
99
+
100
+ 3. **Create a Pull Request**:
101
+ Open a pull request from your feature branch to the `stagging` branch. Ensure the pull request description clearly describes the changes and any additional context necessary for review.
102
+
103
+ ## Contributing
104
+ - [Tieu-Long Phan](https://tieulongphan.github.io/)
105
+ - [Klaus Weinbauer](https://github.com/klausweinbauer)
106
+ - [Phuoc-Chung Nguyen Van](https://github.com/phuocchung123)
107
+
108
+ ## Deployment timeline
109
+
110
+ We plan to update new version quarterly.
111
+
112
+
113
+ ## License
114
+
115
+ This project is licensed under MIT License - see the [License](LICENSE) file for details.
116
+
117
+ ## Acknowledgments
118
+
119
+ This project has received funding from the European Unions Horizon Europe Doctoral Network programme under the Marie-Skłodowska-Curie grant agreement No 101072930 ([TACsy](https://tacsy.eu/) -- Training Alliance for Computational)
File without changes
@@ -0,0 +1,74 @@
1
+ import unittest
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ from synkit.Chem.Fingerprint.fp_calculator import FPCalculator
6
+
7
+
8
+ class TestFPCalculator(unittest.TestCase):
9
+ def setUp(self):
10
+ # Sample data setup
11
+ self.data = pd.DataFrame(
12
+ {
13
+ "smiles": [
14
+ (
15
+ "C1CCCCC1.CCO.CS(=O)(=O)N1CCN(Cc2ccccc2)CC1.[OH-].[OH-].[Pd+2]"
16
+ + ">>CS(=O)(=O)N1CCNCC1"
17
+ ),
18
+ (
19
+ "CCOC(C)=O.Cc1cc([N+](=O)[O-])ccc1NC(=O)c1ccccc1.Cl[Sn]Cl.O.O.O=C([O-])O.[Na+]"
20
+ + ">>Cc1cc(N)ccc1NC(=O)c1ccccc1"
21
+ ),
22
+ (
23
+ "COc1ccc(-c2coc3ccc(-c4nnc(S)o4)cc23)cc1.COc1ccc(CCl)cc1F"
24
+ + ">>COc1ccc(-c2coc3ccc(-c4nnc(SCc5ccc(OC)c(F)c5)o4)cc23)cc1"
25
+ ),
26
+ ],
27
+ "ID": [1, 2, 3],
28
+ }
29
+ )
30
+ self.smiles_column = "smiles"
31
+ self.fp_type = "drfp"
32
+ self.n_jobs = 2
33
+ self.verbose = 0
34
+ self.save_path = None
35
+
36
+ # Instantiate the FPCalculator
37
+ self.fp_calculator = FPCalculator(
38
+ data=self.data,
39
+ smiles_column=self.smiles_column,
40
+ fp_type=self.fp_type,
41
+ n_jobs=self.n_jobs,
42
+ verbose=self.verbose,
43
+ save_path=self.save_path,
44
+ )
45
+
46
+ def test_init_invalid_fp_type(self):
47
+ with self.assertRaises(ValueError):
48
+ FPCalculator(data=self.data, fp_type="invalid_type")
49
+
50
+ def test_fit_missing_column(self):
51
+ with self.assertRaises(ValueError):
52
+ fp_calculator = FPCalculator(
53
+ data=pd.DataFrame({"not_smiles": ["C"]}), smiles_column="smiles"
54
+ )
55
+ fp_calculator.fit()
56
+
57
+ def test_constructor_and_attribute_assignment(self):
58
+ self.assertEqual(self.fp_calculator.smiles_column, "smiles")
59
+ self.assertEqual(self.fp_calculator.fp_type, "drfp")
60
+ self.assertEqual(self.fp_calculator.n_jobs, 2)
61
+ self.assertIsNone(self.fp_calculator.save_path)
62
+
63
+ def test_calculate_drfp(self):
64
+ smiles = "C1CCCCC1.CCO.CS(=O)(=O)N1CCN(Cc2ccccc2)CC1.[OH-].[OH-].[Pd+2]>>CS(=O)(=O)N1CCNCC1"
65
+ fp = self.fp_calculator.calculate_drfp(smiles)
66
+ self.assertEqual(type(fp), np.ndarray)
67
+
68
+ def test_parallel_calculate_drfp(self):
69
+ results = self.fp_calculator.fit()
70
+ self.assertEqual(type(results), pd.DataFrame)
71
+
72
+
73
+ if __name__ == "__main__":
74
+ unittest.main()
@@ -0,0 +1,85 @@
1
+ import unittest
2
+ from rdkit import Chem, DataStructs
3
+ from rdkit.Chem import MACCSkeys
4
+ import numpy as np
5
+
6
+ from synkit.Chem.Fingerprint.smiles_featurizer import SmilesFeaturizer
7
+
8
+
9
+ class TestSmilesFeaturizer(unittest.TestCase):
10
+
11
+ def setUp(self):
12
+ """Set up for tests with a valid smiles string for all tests to use."""
13
+ self.valid_smiles = "CCO" # Ethanol
14
+ self.mol = SmilesFeaturizer.smiles_to_mol(self.valid_smiles)
15
+
16
+ def test_smiles_to_mol_valid(self):
17
+ """Test conversion of valid SMILES to Mol object"""
18
+ smiles = "CCO" # Ethanol
19
+ mol = SmilesFeaturizer.smiles_to_mol(smiles)
20
+ self.assertIsInstance(mol, Chem.Mol)
21
+
22
+ def test_smiles_to_mol_invalid(self):
23
+ """Test handling of invalid SMILES strings"""
24
+ smiles = "CC1"
25
+ with self.assertRaises(ValueError):
26
+ _ = SmilesFeaturizer.smiles_to_mol(smiles)
27
+
28
+ def test_get_maccs_keys(self):
29
+ """Test MACCS keys fingerprint generation"""
30
+ fp = SmilesFeaturizer.get_maccs_keys(self.mol)
31
+ self.assertIsInstance(fp, MACCSkeys.GenMACCSKeys(self.mol).__class__)
32
+
33
+ def test_get_avalon_fp(self):
34
+ """Test Avalon fingerprint generation with default and custom bit lengths"""
35
+ fp = SmilesFeaturizer.get_avalon_fp(self.mol)
36
+ self.assertEqual(len(fp), 1024)
37
+ fp_custom = SmilesFeaturizer.get_avalon_fp(self.mol, nBits=512)
38
+ self.assertEqual(len(fp_custom), 512)
39
+
40
+ def test_get_ecfp(self):
41
+ """Test ECFP fingerprint generation"""
42
+ fp = SmilesFeaturizer.get_ecfp(self.mol, radius=2)
43
+ self.assertEqual(len(fp), 2048) # Default bit size check
44
+
45
+ def test_get_rdk_fp(self):
46
+ """Test RDKit fingerprint generation"""
47
+ fp = SmilesFeaturizer.get_rdk_fp(self.mol, maxPath=5)
48
+ self.assertEqual(len(fp), 2048) # Check the default size
49
+ fp_custom = SmilesFeaturizer.get_rdk_fp(
50
+ self.mol, maxPath=5, fpSize=1024, nBitsPerHash=1
51
+ )
52
+ self.assertEqual(len(fp_custom), 1024) # Custom size check
53
+
54
+ def test_mol_to_ap(self):
55
+ """Test Atom Pair fingerprint generation"""
56
+ ap_fp = SmilesFeaturizer.mol_to_ap(self.mol)
57
+ ar = np.zeros((1,), dtype=np.int8)
58
+ DataStructs.ConvertToNumpyArray(ap_fp, ar)
59
+ self.assertEqual(len(ar), 8388608)
60
+
61
+ def test_mol_to_pharm2d(self):
62
+ """Test 2D Pharmacophore fingerprint generation"""
63
+ pharm2d_fp = SmilesFeaturizer.mol_to_pharm2d(self.mol)
64
+ ar = np.frombuffer(pharm2d_fp.ToBitString().encode(), "u1") - ord("0")
65
+ self.assertEqual(len(ar), 39972)
66
+
67
+ def test_featurize_smiles(self):
68
+ """Test featurization of SMILES strings to numpy arrays and raw bit vectors"""
69
+ smiles = "CCO"
70
+ np_fp = SmilesFeaturizer.featurize_smiles(smiles, "maccs")
71
+ self.assertIsInstance(np_fp, np.ndarray)
72
+ bit_fp = SmilesFeaturizer.featurize_smiles(
73
+ smiles, "maccs", convert_to_array=False
74
+ )
75
+ self.assertNotIsInstance(bit_fp, np.ndarray) # Should be RDKit ExplicitBitVect
76
+
77
+ def test_error_on_unsupported_fingerprint_type(self):
78
+ """Test error handling for unsupported fingerprint types"""
79
+ smiles = "CCO"
80
+ with self.assertRaises(ValueError):
81
+ _ = SmilesFeaturizer.featurize_smiles(smiles, "unsupported_fp_type")
82
+
83
+
84
+ if __name__ == "__main__":
85
+ unittest.main()
@@ -0,0 +1,56 @@
1
+ import unittest
2
+ import numpy as np
3
+ from rdkit.DataStructs import cDataStructs
4
+
5
+ from synkit.Chem.Fingerprint.transformation_fp import TransformationFP
6
+
7
+
8
+ class TestTransformationFP(unittest.TestCase):
9
+
10
+ def test_convert_arr2vec(self):
11
+ """Test conversion of numpy array to RDKit ExplicitBitVect"""
12
+ input_array = np.array([1, 0, 1, 1, 0, 1])
13
+ bit_vect = TransformationFP.convert_arr2vec(input_array)
14
+ self.assertIsInstance(bit_vect, cDataStructs.ExplicitBitVect)
15
+ self.assertEqual(bit_vect.GetNumBits(), len(input_array))
16
+
17
+ def test_fit(self):
18
+ """Test the generation of reaction fingerprints from reaction SMILES"""
19
+ reaction_smiles = "CCO.CCN>>CCOC(C)N"
20
+ symbols = ">>"
21
+ fp_type = "maccs"
22
+ abs_val = True
23
+
24
+ # Test with return_array=True
25
+ reaction_fp_array = TransformationFP.fit(
26
+ reaction_smiles, symbols, fp_type, abs_val
27
+ )
28
+ self.assertIsInstance(reaction_fp_array, np.ndarray)
29
+
30
+ # Test with return_array=False
31
+ reaction_fp_bitvect = TransformationFP.fit(
32
+ reaction_smiles, symbols, fp_type, abs_val, return_array=False
33
+ )
34
+ self.assertIsInstance(reaction_fp_bitvect, cDataStructs.ExplicitBitVect)
35
+
36
+ def test_fit_invalid_smiles(self):
37
+ """Test fit method with invalid SMILES that should raise an error in underlying methods"""
38
+ reaction_smiles = "invalid_smiles>>invalid_smiles"
39
+ symbols = ">>"
40
+ fp_type = "maccs"
41
+ abs_val = True
42
+ with self.assertRaises(Exception):
43
+ _ = TransformationFP.fit(reaction_smiles, symbols, fp_type, abs_val)
44
+
45
+ def test_fit_reaction_split(self):
46
+ """Test handling of SMILES split by symbols and impact on results"""
47
+ reaction_smiles = "CCO>>CCN" # Simple reaction split case
48
+ symbols = ">>"
49
+ fp_type = "maccs"
50
+ abs_val = False # without taking absolute values
51
+ reaction_fp = TransformationFP.fit(reaction_smiles, symbols, fp_type, abs_val)
52
+ self.assertIsInstance(reaction_fp, np.ndarray)
53
+
54
+
55
+ if __name__ == "__main__":
56
+ unittest.main()
File without changes