synkit 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synkit-0.0.1/.github/workflows/publish-package.yml +49 -0
- synkit-0.0.1/.github/workflows/test-and-lint.yml +55 -0
- synkit-0.0.1/.gitignore +10 -0
- synkit-0.0.1/CHANGELOG.md +0 -0
- synkit-0.0.1/Data/Figure/synkit.png +0 -0
- synkit-0.0.1/Data/Testcase/mech.json.gz +1 -0
- synkit-0.0.1/LICENSE +21 -0
- synkit-0.0.1/PKG-INFO +148 -0
- synkit-0.0.1/README.md +119 -0
- synkit-0.0.1/Test/Chem/Fingerprint/__init__.py +0 -0
- synkit-0.0.1/Test/Chem/Fingerprint/test_fp_calculator.py +74 -0
- synkit-0.0.1/Test/Chem/Fingerprint/test_smiles_featurizer.py +85 -0
- synkit-0.0.1/Test/Chem/Fingerprint/test_transformation_fp.py +56 -0
- synkit-0.0.1/Test/Chem/Molecule/__init__.py +0 -0
- synkit-0.0.1/Test/Chem/Molecule/test_standardize.py +90 -0
- synkit-0.0.1/Test/Chem/Reaction/__init__.py +0 -0
- synkit-0.0.1/Test/Chem/Reaction/test_balance_checker.py +120 -0
- synkit-0.0.1/Test/Chem/Reaction/test_cleanning.py +26 -0
- synkit-0.0.1/Test/Chem/Reaction/test_deionize.py +52 -0
- synkit-0.0.1/Test/Chem/Reaction/test_neutralize.py +160 -0
- synkit-0.0.1/Test/Chem/Reaction/test_reagent.py +98 -0
- synkit-0.0.1/Test/Chem/Reaction/test_standardize.py +58 -0
- synkit-0.0.1/Test/Chem/Reaction/test_tautomerize.py +63 -0
- synkit-0.0.1/Test/Chem/__init__.py +0 -0
- synkit-0.0.1/Test/Graph/Cluster/__init__.py +0 -0
- synkit-0.0.1/Test/Graph/Feature/__init__.py +0 -0
- synkit-0.0.1/Test/Graph/Feature/test_graph_descriptors.py +195 -0
- synkit-0.0.1/Test/Graph/Feature/test_graph_fps.py +59 -0
- synkit-0.0.1/Test/Graph/Feature/test_graph_signature.py +101 -0
- synkit-0.0.1/Test/Graph/Feature/test_hash_fps.py +52 -0
- synkit-0.0.1/Test/Graph/Feature/test_morgan_fps.py +39 -0
- synkit-0.0.1/Test/Graph/Feature/test_path_fps.py +40 -0
- synkit-0.0.1/Test/Graph/__init__.py +0 -0
- synkit-0.0.1/Test/IO/__init__.py +0 -0
- synkit-0.0.1/Test/IO/test_chemical_conversion.py +143 -0
- synkit-0.0.1/Test/IO/test_gml_to_nx.py +132 -0
- synkit-0.0.1/Test/IO/test_graph_to_mol.py +59 -0
- synkit-0.0.1/Test/IO/test_mol_to_graph.py +49 -0
- synkit-0.0.1/Test/IO/test_nx_to_gml.py +101 -0
- synkit-0.0.1/Test/ITS/__init__.py +0 -0
- synkit-0.0.1/Test/ITS/test_aam_validator.py +97 -0
- synkit-0.0.1/Test/ITS/test_its_construction.py +51 -0
- synkit-0.0.1/Test/ITS/test_normalize_aam.py +63 -0
- synkit-0.0.1/Test/Reactor/__init__.py +0 -0
- synkit-0.0.1/Test/Reactor/test_core_engine.py +112 -0
- synkit-0.0.1/Test/Reactor/test_multi_step.py +50 -0
- synkit-0.0.1/Test/Reactor/test_multiple_step_aam.py +67 -0
- synkit-0.0.1/Test/Reactor/test_reagent.py +58 -0
- synkit-0.0.1/Test/Vis/__init__.py +0 -0
- synkit-0.0.1/Test/Vis/test_embedding.py +50 -0
- synkit-0.0.1/Test/__init__.py +0 -0
- synkit-0.0.1/lint.sh +6 -0
- synkit-0.0.1/pyproject.toml +37 -0
- synkit-0.0.1/pytest.sh +3 -0
- synkit-0.0.1/requirements.txt +13 -0
- synkit-0.0.1/synkit/Chem/Fingerprint/__init__.py +0 -0
- synkit-0.0.1/synkit/Chem/Fingerprint/fp_calculator.py +122 -0
- synkit-0.0.1/synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
- synkit-0.0.1/synkit/Chem/Fingerprint/transformation_fp.py +79 -0
- synkit-0.0.1/synkit/Chem/Molecule/__init__.py +0 -0
- synkit-0.0.1/synkit/Chem/Molecule/standardize.py +137 -0
- synkit-0.0.1/synkit/Chem/Reaction/__init__.py +0 -0
- synkit-0.0.1/synkit/Chem/Reaction/balance_check.py +162 -0
- synkit-0.0.1/synkit/Chem/Reaction/cleanning.py +59 -0
- synkit-0.0.1/synkit/Chem/Reaction/deionize.py +289 -0
- synkit-0.0.1/synkit/Chem/Reaction/neutralize.py +256 -0
- synkit-0.0.1/synkit/Chem/Reaction/reagent.py +102 -0
- synkit-0.0.1/synkit/Chem/Reaction/standardize.py +157 -0
- synkit-0.0.1/synkit/Chem/Reaction/tautomerize.py +168 -0
- synkit-0.0.1/synkit/Graph/Cluster/__init__.py +0 -0
- synkit-0.0.1/synkit/Graph/Cluster/morphism.py +83 -0
- synkit-0.0.1/synkit/Graph/Feature/__init__.py +0 -0
- synkit-0.0.1/synkit/Graph/Feature/graph_descriptors.py +325 -0
- synkit-0.0.1/synkit/Graph/Feature/graph_fps.py +97 -0
- synkit-0.0.1/synkit/Graph/Feature/graph_signature.py +236 -0
- synkit-0.0.1/synkit/Graph/Feature/hash_fps.py +130 -0
- synkit-0.0.1/synkit/Graph/Feature/morgan_fps.py +87 -0
- synkit-0.0.1/synkit/Graph/Feature/path_fps.py +82 -0
- synkit-0.0.1/synkit/Graph/__init.py +0 -0
- synkit-0.0.1/synkit/IO/__init__.py +0 -0
- synkit-0.0.1/synkit/IO/chem_converter.py +231 -0
- synkit-0.0.1/synkit/IO/data_io.py +277 -0
- synkit-0.0.1/synkit/IO/data_process.py +49 -0
- synkit-0.0.1/synkit/IO/debug.py +78 -0
- synkit-0.0.1/synkit/IO/dg_to_gml.py +124 -0
- synkit-0.0.1/synkit/IO/gml_to_nx.py +119 -0
- synkit-0.0.1/synkit/IO/graph_to_mol.py +110 -0
- synkit-0.0.1/synkit/IO/mol_to_graph.py +282 -0
- synkit-0.0.1/synkit/IO/nx_to_gml.py +200 -0
- synkit-0.0.1/synkit/IO/parse_rule.py +172 -0
- synkit-0.0.1/synkit/IO/smiles_to_id.py +119 -0
- synkit-0.0.1/synkit/ITS/_misc.py +280 -0
- synkit-0.0.1/synkit/ITS/aam_validator.py +254 -0
- synkit-0.0.1/synkit/ITS/its_builder.py +94 -0
- synkit-0.0.1/synkit/ITS/its_construction.py +213 -0
- synkit-0.0.1/synkit/ITS/normalize_aam.py +183 -0
- synkit-0.0.1/synkit/ITS/partial_expand.py +170 -0
- synkit-0.0.1/synkit/Reactor/__init__.py +0 -0
- synkit-0.0.1/synkit/Reactor/core_engine.py +164 -0
- synkit-0.0.1/synkit/Reactor/inference.py +73 -0
- synkit-0.0.1/synkit/Reactor/multi_step.py +227 -0
- synkit-0.0.1/synkit/Reactor/multi_step_aam.py +82 -0
- synkit-0.0.1/synkit/Reactor/reagent.py +95 -0
- synkit-0.0.1/synkit/Reactor/rule_apply.py +81 -0
- synkit-0.0.1/synkit/Vis/__init__.py +0 -0
- synkit-0.0.1/synkit/Vis/chemical_graph_visualizer.py +378 -0
- synkit-0.0.1/synkit/Vis/chemical_reaction_visualizer.py +133 -0
- synkit-0.0.1/synkit/Vis/chemical_space.py +83 -0
- synkit-0.0.1/synkit/Vis/embedding.py +92 -0
- synkit-0.0.1/synkit/Vis/graph_visualizer.py +286 -0
- synkit-0.0.1/synkit/Vis/pdf_writer.py +143 -0
- synkit-0.0.1/synkit/Vis/rsmi_to_fig.py +169 -0
- synkit-0.0.1/synkit/__init__.py +0 -0
- synkit-0.0.1/synkit/_misc.py +181 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
name: PyPI publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
build:
|
|
12
|
+
name: Build package
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout
|
|
16
|
+
uses: actions/checkout@v3
|
|
17
|
+
- name: Set up Python
|
|
18
|
+
uses: actions/setup-python@v3
|
|
19
|
+
with:
|
|
20
|
+
python-version: '3.x'
|
|
21
|
+
- name: Install dependencies
|
|
22
|
+
run: |
|
|
23
|
+
python -m pip install --upgrade pip
|
|
24
|
+
pip install build
|
|
25
|
+
- name: Build package
|
|
26
|
+
run: python -m build
|
|
27
|
+
- name: Upload artifact
|
|
28
|
+
uses: actions/upload-artifact@v4
|
|
29
|
+
with:
|
|
30
|
+
name: package
|
|
31
|
+
path: dist/
|
|
32
|
+
|
|
33
|
+
publish:
|
|
34
|
+
name: Upload release to PyPI
|
|
35
|
+
needs: build
|
|
36
|
+
runs-on: ubuntu-latest
|
|
37
|
+
environment:
|
|
38
|
+
name: pypi
|
|
39
|
+
url: https://pypi.org/p/synkit
|
|
40
|
+
permissions:
|
|
41
|
+
id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
|
|
42
|
+
steps:
|
|
43
|
+
- name: Download artifact
|
|
44
|
+
uses: actions/download-artifact@v4
|
|
45
|
+
with:
|
|
46
|
+
name: package
|
|
47
|
+
path: dist/
|
|
48
|
+
- name: Publish package to PyPI
|
|
49
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# This workflow will install Python dependencies, run tests and lint with a
|
|
2
|
+
# single version of Python
|
|
3
|
+
# For more information see:
|
|
4
|
+
# https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
|
|
5
|
+
|
|
6
|
+
name: Test & Lint
|
|
7
|
+
|
|
8
|
+
on:
|
|
9
|
+
push:
|
|
10
|
+
branches: [ "main", "dev", "maintain" ]
|
|
11
|
+
pull_request:
|
|
12
|
+
branches: [ "main" ]
|
|
13
|
+
|
|
14
|
+
permissions:
|
|
15
|
+
contents: read
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
build:
|
|
19
|
+
|
|
20
|
+
runs-on: ubuntu-latest
|
|
21
|
+
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v3
|
|
24
|
+
|
|
25
|
+
# Set up Miniconda
|
|
26
|
+
- name: Set up Miniconda
|
|
27
|
+
uses: conda-incubator/setup-miniconda@v2
|
|
28
|
+
with:
|
|
29
|
+
auto-update-conda: true
|
|
30
|
+
python-version: 3.11
|
|
31
|
+
|
|
32
|
+
# Create and activate Conda environment
|
|
33
|
+
- name: Create and activate environment
|
|
34
|
+
run: |
|
|
35
|
+
conda create --name synkit-env python=3.11 -y
|
|
36
|
+
conda activate synkit-env
|
|
37
|
+
conda install -c jakobandersen -c conda-forge mod
|
|
38
|
+
pip install flake8 pytest
|
|
39
|
+
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
|
40
|
+
shell: bash -l {0}
|
|
41
|
+
|
|
42
|
+
# Lint with flake8
|
|
43
|
+
- name: Lint with flake8
|
|
44
|
+
run: |
|
|
45
|
+
conda activate synkit-env
|
|
46
|
+
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
|
47
|
+
./lint.sh
|
|
48
|
+
shell: bash -l {0}
|
|
49
|
+
|
|
50
|
+
# Test with pytest
|
|
51
|
+
- name: Test with pytest
|
|
52
|
+
run: |
|
|
53
|
+
conda activate synkit-env
|
|
54
|
+
./pytest.sh
|
|
55
|
+
shell: bash -l {0}
|
synkit-0.0.1/.gitignore
ADDED
|
File without changes
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
[{"R-id": "Mech-1", "reaction": "[CH3:1][CH:2]=[O:3].[CH:4]([H:7])([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O:3]([H:7])([H:8])", "mechanisms": [{"mech_name": "Aldol reaction (base cat)", "steps": [{"description": "Base abstracts H from substrate", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6].[*-:9]>>[CH-:4]([H:8])[CH:5]=[O:6].[*:9][H:7]", "step": 1, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Base abstracts H from substrate"}, {"description": "Nucleophilic addition fro substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH-:4]([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]([O-:3])[CH:4]([H:8])[CH:5]=[O:6]", "step": 2, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"=\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n ]\n right [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n node [ id 2 label \"O-\" ]\n node [ id 3 label \"C\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]=[O:2].[CH3-:3]>>[CH2:1]([O-:2])[CH3:3]", "step_desc": "Nucleophilic addition fro substrate"}, {"description": "Neutralize substrate", "smart_string": "[CH3:1][CH:2]([O-:3])[CH:4]([H:8])[CH:5]=[O:6].[*:9][H:7]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[*-:9]", "step": 3, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Neutralize substrate"}, {"description": "Base abstracts H from substrate", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[*-:9]>>[CH3:1][CH:2]([O:3][H:7])[CH-:4][CH:5]=[O:6].[*:9][H:8]", "step": 4, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Base abstracts H from substrate"}, {"description": "Elimination Unimolecular Conjugate Base", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH-:4][CH:5]=[O:6]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O-:3][H:7]", "step": 5, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n ]\n right [\n edge [ source 1 target 3 label \"=\" ]\n node [ id 2 label \"O-\" ]\n node [ id 3 label \"C\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]([OH:2])[CH2-:3]>>[CH2:1]=[CH2:3].[OH-:2]", "step_desc": "Elimination Unimolecular Conjugate Base"}, {"description": "Neutralize Hydroxide", "smart_string": "[O-:3][H:7].[*:9][H:8]>>[O:3]([H:7])([H:8]).[*-:9]", "step": 6, "step_gml": "rule [\n ruleID \"2\"\n left [\n ]\n context [\n ]\n right [\n ]\n]", "step_dfs": null, "step_smart": ">>", "step_desc": "Neutralize Hydroxide"}], "cat": "[*-]"}, {"mech_name": "Aldol reaction (neutral cat)", "steps": [{"description": "Tautomerization of substrate", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6]>>[CH:4]([H:8])=[CH:5][O:6]([H:7])", "step": 1, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 2 target 3 label \"=\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"C\" ]\n node [ id 3 label \"O\" ]\n ]\n right [\n edge [ source 1 target 2 label \"=\" ]\n edge [ source 2 target 3 label \"-\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH3:1][CH:2]=[O:3]>>[CH2:1]=[CH:2][OH:3]", "step_desc": "Tautomerization of substrate"}, {"description": "Nucleophilic addition from substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH:4]([H:8])=[CH:5][O:6]([H:7])>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6]", "step": 2, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"=\" ]\n edge [ source 3 target 4 label \"=\" ]\n edge [ source 4 target 5 label \"-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C\" ]\n node [ id 4 label \"C\" ]\n node [ id 5 label \"O\" ]\n ]\n right [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n edge [ source 3 target 4 label \"-\" ]\n edge [ source 4 target 5 label \"=\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]=[O:2].[CH2:3]=[CH:4][OH:5]>>[CH2:1]([OH:2])[CH2:3][CH:4]=[O:5]", "step_desc": "Nucleophilic addition from substrate"}, {"description": "Tautomerization of substrate", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:8])", "step": 3, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 2 target 3 label \"=\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"C\" ]\n node [ id 3 label \"O\" ]\n ]\n right [\n edge [ source 1 target 2 label \"=\" ]\n edge [ source 2 target 3 label \"-\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH3:1][CH:2]=[O:3]>>[CH2:1]=[CH:2][OH:3]", "step_desc": "Tautomerization of substrate"}, {"description": "Elimination", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:8])>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[O:3]([H:7])([H:8])", "step": 4, "step_gml": "rule [\n ruleID \"2\"\n left [\n edge [ source 1 target 2 label \"-\" ]\n edge [ source 1 target 3 label \"-\" ]\n edge [ source 3 target 4 label \"=\" ]\n edge [ source 4 target 5 label \"-\" ]\n ]\n context [\n node [ id 1 label \"C\" ]\n node [ id 2 label \"O\" ]\n node [ id 3 label \"C\" ]\n node [ id 4 label \"C\" ]\n node [ id 5 label \"O\" ]\n ]\n right [\n edge [ source 1 target 3 label \"=\" ]\n edge [ source 3 target 4 label \"-\" ]\n edge [ source 4 target 5 label \"=\" ]\n ]\n]", "step_dfs": null, "step_smart": "[CH2:1]([OH:2])[CH:3]=[CH:4][OH:5]>>[CH2:1]=[CH:3][CH:4]=[O:5].[OH2:2]", "step_desc": "Elimination"}], "cat": ""}, {"mech_name": "Aldol reaction (acid cat)", "steps": [{"description": "Tautomerization of substrate with acid cat", "smart_string": "[CH:4]([H:7])([H:8])[CH:5]=[O:6].[H+:9]>>[CH:4]([H:8])=[CH:5][O:6]([H:9]).[H+:7]", "step": 1}, {"description": "Nucleophilic addition from substrate", "smart_string": "[CH3:1][CH:2]=[O:3].[CH:4]([H:8])=[CH:5][O:6]([H:9]).[H+:7]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[H+:9]", "step": 2}, {"description": "Tautomerization of substrate with acid cat", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]([H:8])[CH:5]=[O:6].[H+:9]>>[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:9]).[H+:8]", "step": 3}, {"description": "Elimination", "smart_string": "[CH3:1][CH:2]([O:3][H:7])[CH:4]=[CH:5][O:6]([H:9]).[H+:8]>>[CH3:1][CH:2]=[CH:4][CH:5]=[O:6].[H+:9].[O:3]([H:7])([H:8])", "step": 4}], "cat": "[H+]"}]}]
|
synkit-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Tieu Long Phan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
synkit-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: synkit
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Utility for reaction modeling using graph grammar
|
|
5
|
+
Project-URL: homepage, https://github.com/TieuLongPhan/SynKit
|
|
6
|
+
Project-URL: source, https://github.com/TieuLongPhan/SynKit
|
|
7
|
+
Project-URL: issues, https://github.com/TieuLongPhan/SynKit/issues
|
|
8
|
+
Project-URL: documentation, https://tieulongphan.github.io/SynKit/
|
|
9
|
+
Author-email: Tieu Long Phan <tieu@bioinf.uni-leipzig.de>
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Requires-Python: >=3.11
|
|
15
|
+
Requires-Dist: networkx>=3.3
|
|
16
|
+
Requires-Dist: pandas>=1.5.3
|
|
17
|
+
Requires-Dist: rdkit>=2024.3.3
|
|
18
|
+
Requires-Dist: requests>=2.32.3
|
|
19
|
+
Requires-Dist: scikit-learn>=1.4.0
|
|
20
|
+
Requires-Dist: seaborn>=0.13.2
|
|
21
|
+
Provides-Extra: all
|
|
22
|
+
Requires-Dist: drfp==0.3.6; extra == 'all'
|
|
23
|
+
Requires-Dist: fgutils>=0.1.3; extra == 'all'
|
|
24
|
+
Requires-Dist: rxn-chem-utils==1.5.0; extra == 'all'
|
|
25
|
+
Requires-Dist: rxn-utils==2.0.0; extra == 'all'
|
|
26
|
+
Requires-Dist: rxnmapper==0.3.0; extra == 'all'
|
|
27
|
+
Requires-Dist: xgboost>=2.1.1; extra == 'all'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# SynKit
|
|
31
|
+
|
|
32
|
+
**Toolkit for Synthesis Planning**
|
|
33
|
+
|
|
34
|
+
SynKit is a collection of tools designed to support the planning and execution of chemical synthesis.
|
|
35
|
+
|
|
36
|
+

|
|
37
|
+
|
|
38
|
+
Our tools are tailored to assist researchers and chemists in navigating complex chemical reactions and synthesis pathways, leveraging the power of modern computational chemistry. Whether you're designing novel compounds or optimizing existing processes, ``synkit`` aims to provide the critical tools you need.
|
|
39
|
+
|
|
40
|
+
For more details on each utility within the repository, please refer to the documentation provided in the respective folders.
|
|
41
|
+
|
|
42
|
+
## Step-by-Step Installation Guide
|
|
43
|
+
|
|
44
|
+
1. **Python Installation:**
|
|
45
|
+
Ensure that Python 3.11 or later is installed on your system. You can download it from [python.org](https://www.python.org/downloads/).
|
|
46
|
+
|
|
47
|
+
2. **Creating a Virtual Environment (Optional but Recommended):**
|
|
48
|
+
It's recommended to use a virtual environment to avoid conflicts with other projects or system-wide packages. Use the following commands to create and activate a virtual environment:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
python -m venv synkit-env
|
|
52
|
+
source synkit-env/bin/activate
|
|
53
|
+
```
|
|
54
|
+
Or Conda
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
conda create --name synkit-env python=3.11
|
|
58
|
+
conda activate synkit-env
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
3. **Install from PyPi:**
|
|
62
|
+
The easiest way to use SynTemp is by installing the PyPI package
|
|
63
|
+
[synkit](https://pypi.org/project/synkit/).
|
|
64
|
+
|
|
65
|
+
```
|
|
66
|
+
pip install synkit
|
|
67
|
+
```
|
|
68
|
+
Optional if you want to install full version
|
|
69
|
+
```
|
|
70
|
+
pip install synkit[all]
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## For contributors
|
|
74
|
+
|
|
75
|
+
We're welcoming new contributors to build this project better. Please not hesitate to inquire me via [email][tieu@bioinf.uni-leipzig.de].
|
|
76
|
+
|
|
77
|
+
Before you start, ensure your local development environment is set up correctly. Pull the latest version of the `main` branch to start with the most recent stable code.
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
git checkout main
|
|
81
|
+
git pull
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## Working on New Features
|
|
85
|
+
|
|
86
|
+
1. **Create a New Branch**:
|
|
87
|
+
For every new feature or bug fix, create a new branch from the `main` branch. Name your branch meaningfully, related to the feature or fix you are working on.
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
git checkout -b feature/your-feature-name
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
2. **Develop and Commit Changes**:
|
|
94
|
+
Make your changes locally, commit them to your branch. Keep your commits small and focused; each should represent a logical unit of work.
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
git commit -m "Describe the change"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
3. **Run Quality Checks**:
|
|
101
|
+
Before finalizing your feature, run the following commands to ensure your code meets our formatting standards and passes all tests:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
./lint.sh # Check code format
|
|
105
|
+
pytest Test # Run tests
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Fix any issues or errors highlighted by these checks.
|
|
109
|
+
|
|
110
|
+
## Integrating Changes
|
|
111
|
+
|
|
112
|
+
1. **Rebase onto Staging**:
|
|
113
|
+
Once your feature is complete and tests pass, rebase your changes onto the `staging` branch to prepare for integration.
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
git fetch origin
|
|
117
|
+
git rebase origin/staging
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Carefully resolve any conflicts that arise during the rebase.
|
|
121
|
+
|
|
122
|
+
2. **Push to Your Feature Branch**:
|
|
123
|
+
After successfully rebasing, push your branch to the remote repository.
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
git push origin feature/your-feature-name
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
3. **Create a Pull Request**:
|
|
130
|
+
Open a pull request from your feature branch to the `stagging` branch. Ensure the pull request description clearly describes the changes and any additional context necessary for review.
|
|
131
|
+
|
|
132
|
+
## Contributing
|
|
133
|
+
- [Tieu-Long Phan](https://tieulongphan.github.io/)
|
|
134
|
+
- [Klaus Weinbauer](https://github.com/klausweinbauer)
|
|
135
|
+
- [Phuoc-Chung Nguyen Van](https://github.com/phuocchung123)
|
|
136
|
+
|
|
137
|
+
## Deployment timeline
|
|
138
|
+
|
|
139
|
+
We plan to update new version quarterly.
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
## License
|
|
143
|
+
|
|
144
|
+
This project is licensed under MIT License - see the [License](LICENSE) file for details.
|
|
145
|
+
|
|
146
|
+
## Acknowledgments
|
|
147
|
+
|
|
148
|
+
This project has received funding from the European Unions Horizon Europe Doctoral Network programme under the Marie-Skłodowska-Curie grant agreement No 101072930 ([TACsy](https://tacsy.eu/) -- Training Alliance for Computational)
|
synkit-0.0.1/README.md
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# SynKit
|
|
2
|
+
|
|
3
|
+
**Toolkit for Synthesis Planning**
|
|
4
|
+
|
|
5
|
+
SynKit is a collection of tools designed to support the planning and execution of chemical synthesis.
|
|
6
|
+
|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
Our tools are tailored to assist researchers and chemists in navigating complex chemical reactions and synthesis pathways, leveraging the power of modern computational chemistry. Whether you're designing novel compounds or optimizing existing processes, ``synkit`` aims to provide the critical tools you need.
|
|
10
|
+
|
|
11
|
+
For more details on each utility within the repository, please refer to the documentation provided in the respective folders.
|
|
12
|
+
|
|
13
|
+
## Step-by-Step Installation Guide
|
|
14
|
+
|
|
15
|
+
1. **Python Installation:**
|
|
16
|
+
Ensure that Python 3.11 or later is installed on your system. You can download it from [python.org](https://www.python.org/downloads/).
|
|
17
|
+
|
|
18
|
+
2. **Creating a Virtual Environment (Optional but Recommended):**
|
|
19
|
+
It's recommended to use a virtual environment to avoid conflicts with other projects or system-wide packages. Use the following commands to create and activate a virtual environment:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
python -m venv synkit-env
|
|
23
|
+
source synkit-env/bin/activate
|
|
24
|
+
```
|
|
25
|
+
Or Conda
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
conda create --name synkit-env python=3.11
|
|
29
|
+
conda activate synkit-env
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
3. **Install from PyPi:**
|
|
33
|
+
The easiest way to use SynTemp is by installing the PyPI package
|
|
34
|
+
[synkit](https://pypi.org/project/synkit/).
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
pip install synkit
|
|
38
|
+
```
|
|
39
|
+
Optional if you want to install full version
|
|
40
|
+
```
|
|
41
|
+
pip install synkit[all]
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## For contributors
|
|
45
|
+
|
|
46
|
+
We're welcoming new contributors to build this project better. Please not hesitate to inquire me via [email][tieu@bioinf.uni-leipzig.de].
|
|
47
|
+
|
|
48
|
+
Before you start, ensure your local development environment is set up correctly. Pull the latest version of the `main` branch to start with the most recent stable code.
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
git checkout main
|
|
52
|
+
git pull
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Working on New Features
|
|
56
|
+
|
|
57
|
+
1. **Create a New Branch**:
|
|
58
|
+
For every new feature or bug fix, create a new branch from the `main` branch. Name your branch meaningfully, related to the feature or fix you are working on.
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
git checkout -b feature/your-feature-name
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
2. **Develop and Commit Changes**:
|
|
65
|
+
Make your changes locally, commit them to your branch. Keep your commits small and focused; each should represent a logical unit of work.
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
git commit -m "Describe the change"
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
3. **Run Quality Checks**:
|
|
72
|
+
Before finalizing your feature, run the following commands to ensure your code meets our formatting standards and passes all tests:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
./lint.sh # Check code format
|
|
76
|
+
pytest Test # Run tests
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Fix any issues or errors highlighted by these checks.
|
|
80
|
+
|
|
81
|
+
## Integrating Changes
|
|
82
|
+
|
|
83
|
+
1. **Rebase onto Staging**:
|
|
84
|
+
Once your feature is complete and tests pass, rebase your changes onto the `staging` branch to prepare for integration.
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
git fetch origin
|
|
88
|
+
git rebase origin/staging
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Carefully resolve any conflicts that arise during the rebase.
|
|
92
|
+
|
|
93
|
+
2. **Push to Your Feature Branch**:
|
|
94
|
+
After successfully rebasing, push your branch to the remote repository.
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
git push origin feature/your-feature-name
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
3. **Create a Pull Request**:
|
|
101
|
+
Open a pull request from your feature branch to the `stagging` branch. Ensure the pull request description clearly describes the changes and any additional context necessary for review.
|
|
102
|
+
|
|
103
|
+
## Contributing
|
|
104
|
+
- [Tieu-Long Phan](https://tieulongphan.github.io/)
|
|
105
|
+
- [Klaus Weinbauer](https://github.com/klausweinbauer)
|
|
106
|
+
- [Phuoc-Chung Nguyen Van](https://github.com/phuocchung123)
|
|
107
|
+
|
|
108
|
+
## Deployment timeline
|
|
109
|
+
|
|
110
|
+
We plan to update new version quarterly.
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
## License
|
|
114
|
+
|
|
115
|
+
This project is licensed under MIT License - see the [License](LICENSE) file for details.
|
|
116
|
+
|
|
117
|
+
## Acknowledgments
|
|
118
|
+
|
|
119
|
+
This project has received funding from the European Unions Horizon Europe Doctoral Network programme under the Marie-Skłodowska-Curie grant agreement No 101072930 ([TACsy](https://tacsy.eu/) -- Training Alliance for Computational)
|
|
File without changes
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from synkit.Chem.Fingerprint.fp_calculator import FPCalculator
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestFPCalculator(unittest.TestCase):
|
|
9
|
+
def setUp(self):
|
|
10
|
+
# Sample data setup
|
|
11
|
+
self.data = pd.DataFrame(
|
|
12
|
+
{
|
|
13
|
+
"smiles": [
|
|
14
|
+
(
|
|
15
|
+
"C1CCCCC1.CCO.CS(=O)(=O)N1CCN(Cc2ccccc2)CC1.[OH-].[OH-].[Pd+2]"
|
|
16
|
+
+ ">>CS(=O)(=O)N1CCNCC1"
|
|
17
|
+
),
|
|
18
|
+
(
|
|
19
|
+
"CCOC(C)=O.Cc1cc([N+](=O)[O-])ccc1NC(=O)c1ccccc1.Cl[Sn]Cl.O.O.O=C([O-])O.[Na+]"
|
|
20
|
+
+ ">>Cc1cc(N)ccc1NC(=O)c1ccccc1"
|
|
21
|
+
),
|
|
22
|
+
(
|
|
23
|
+
"COc1ccc(-c2coc3ccc(-c4nnc(S)o4)cc23)cc1.COc1ccc(CCl)cc1F"
|
|
24
|
+
+ ">>COc1ccc(-c2coc3ccc(-c4nnc(SCc5ccc(OC)c(F)c5)o4)cc23)cc1"
|
|
25
|
+
),
|
|
26
|
+
],
|
|
27
|
+
"ID": [1, 2, 3],
|
|
28
|
+
}
|
|
29
|
+
)
|
|
30
|
+
self.smiles_column = "smiles"
|
|
31
|
+
self.fp_type = "drfp"
|
|
32
|
+
self.n_jobs = 2
|
|
33
|
+
self.verbose = 0
|
|
34
|
+
self.save_path = None
|
|
35
|
+
|
|
36
|
+
# Instantiate the FPCalculator
|
|
37
|
+
self.fp_calculator = FPCalculator(
|
|
38
|
+
data=self.data,
|
|
39
|
+
smiles_column=self.smiles_column,
|
|
40
|
+
fp_type=self.fp_type,
|
|
41
|
+
n_jobs=self.n_jobs,
|
|
42
|
+
verbose=self.verbose,
|
|
43
|
+
save_path=self.save_path,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
def test_init_invalid_fp_type(self):
|
|
47
|
+
with self.assertRaises(ValueError):
|
|
48
|
+
FPCalculator(data=self.data, fp_type="invalid_type")
|
|
49
|
+
|
|
50
|
+
def test_fit_missing_column(self):
|
|
51
|
+
with self.assertRaises(ValueError):
|
|
52
|
+
fp_calculator = FPCalculator(
|
|
53
|
+
data=pd.DataFrame({"not_smiles": ["C"]}), smiles_column="smiles"
|
|
54
|
+
)
|
|
55
|
+
fp_calculator.fit()
|
|
56
|
+
|
|
57
|
+
def test_constructor_and_attribute_assignment(self):
|
|
58
|
+
self.assertEqual(self.fp_calculator.smiles_column, "smiles")
|
|
59
|
+
self.assertEqual(self.fp_calculator.fp_type, "drfp")
|
|
60
|
+
self.assertEqual(self.fp_calculator.n_jobs, 2)
|
|
61
|
+
self.assertIsNone(self.fp_calculator.save_path)
|
|
62
|
+
|
|
63
|
+
def test_calculate_drfp(self):
|
|
64
|
+
smiles = "C1CCCCC1.CCO.CS(=O)(=O)N1CCN(Cc2ccccc2)CC1.[OH-].[OH-].[Pd+2]>>CS(=O)(=O)N1CCNCC1"
|
|
65
|
+
fp = self.fp_calculator.calculate_drfp(smiles)
|
|
66
|
+
self.assertEqual(type(fp), np.ndarray)
|
|
67
|
+
|
|
68
|
+
def test_parallel_calculate_drfp(self):
|
|
69
|
+
results = self.fp_calculator.fit()
|
|
70
|
+
self.assertEqual(type(results), pd.DataFrame)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
if __name__ == "__main__":
|
|
74
|
+
unittest.main()
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
from rdkit import Chem, DataStructs
|
|
3
|
+
from rdkit.Chem import MACCSkeys
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from synkit.Chem.Fingerprint.smiles_featurizer import SmilesFeaturizer
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TestSmilesFeaturizer(unittest.TestCase):
|
|
10
|
+
|
|
11
|
+
def setUp(self):
|
|
12
|
+
"""Set up for tests with a valid smiles string for all tests to use."""
|
|
13
|
+
self.valid_smiles = "CCO" # Ethanol
|
|
14
|
+
self.mol = SmilesFeaturizer.smiles_to_mol(self.valid_smiles)
|
|
15
|
+
|
|
16
|
+
def test_smiles_to_mol_valid(self):
|
|
17
|
+
"""Test conversion of valid SMILES to Mol object"""
|
|
18
|
+
smiles = "CCO" # Ethanol
|
|
19
|
+
mol = SmilesFeaturizer.smiles_to_mol(smiles)
|
|
20
|
+
self.assertIsInstance(mol, Chem.Mol)
|
|
21
|
+
|
|
22
|
+
def test_smiles_to_mol_invalid(self):
|
|
23
|
+
"""Test handling of invalid SMILES strings"""
|
|
24
|
+
smiles = "CC1"
|
|
25
|
+
with self.assertRaises(ValueError):
|
|
26
|
+
_ = SmilesFeaturizer.smiles_to_mol(smiles)
|
|
27
|
+
|
|
28
|
+
def test_get_maccs_keys(self):
|
|
29
|
+
"""Test MACCS keys fingerprint generation"""
|
|
30
|
+
fp = SmilesFeaturizer.get_maccs_keys(self.mol)
|
|
31
|
+
self.assertIsInstance(fp, MACCSkeys.GenMACCSKeys(self.mol).__class__)
|
|
32
|
+
|
|
33
|
+
def test_get_avalon_fp(self):
|
|
34
|
+
"""Test Avalon fingerprint generation with default and custom bit lengths"""
|
|
35
|
+
fp = SmilesFeaturizer.get_avalon_fp(self.mol)
|
|
36
|
+
self.assertEqual(len(fp), 1024)
|
|
37
|
+
fp_custom = SmilesFeaturizer.get_avalon_fp(self.mol, nBits=512)
|
|
38
|
+
self.assertEqual(len(fp_custom), 512)
|
|
39
|
+
|
|
40
|
+
def test_get_ecfp(self):
|
|
41
|
+
"""Test ECFP fingerprint generation"""
|
|
42
|
+
fp = SmilesFeaturizer.get_ecfp(self.mol, radius=2)
|
|
43
|
+
self.assertEqual(len(fp), 2048) # Default bit size check
|
|
44
|
+
|
|
45
|
+
def test_get_rdk_fp(self):
|
|
46
|
+
"""Test RDKit fingerprint generation"""
|
|
47
|
+
fp = SmilesFeaturizer.get_rdk_fp(self.mol, maxPath=5)
|
|
48
|
+
self.assertEqual(len(fp), 2048) # Check the default size
|
|
49
|
+
fp_custom = SmilesFeaturizer.get_rdk_fp(
|
|
50
|
+
self.mol, maxPath=5, fpSize=1024, nBitsPerHash=1
|
|
51
|
+
)
|
|
52
|
+
self.assertEqual(len(fp_custom), 1024) # Custom size check
|
|
53
|
+
|
|
54
|
+
def test_mol_to_ap(self):
|
|
55
|
+
"""Test Atom Pair fingerprint generation"""
|
|
56
|
+
ap_fp = SmilesFeaturizer.mol_to_ap(self.mol)
|
|
57
|
+
ar = np.zeros((1,), dtype=np.int8)
|
|
58
|
+
DataStructs.ConvertToNumpyArray(ap_fp, ar)
|
|
59
|
+
self.assertEqual(len(ar), 8388608)
|
|
60
|
+
|
|
61
|
+
def test_mol_to_pharm2d(self):
|
|
62
|
+
"""Test 2D Pharmacophore fingerprint generation"""
|
|
63
|
+
pharm2d_fp = SmilesFeaturizer.mol_to_pharm2d(self.mol)
|
|
64
|
+
ar = np.frombuffer(pharm2d_fp.ToBitString().encode(), "u1") - ord("0")
|
|
65
|
+
self.assertEqual(len(ar), 39972)
|
|
66
|
+
|
|
67
|
+
def test_featurize_smiles(self):
|
|
68
|
+
"""Test featurization of SMILES strings to numpy arrays and raw bit vectors"""
|
|
69
|
+
smiles = "CCO"
|
|
70
|
+
np_fp = SmilesFeaturizer.featurize_smiles(smiles, "maccs")
|
|
71
|
+
self.assertIsInstance(np_fp, np.ndarray)
|
|
72
|
+
bit_fp = SmilesFeaturizer.featurize_smiles(
|
|
73
|
+
smiles, "maccs", convert_to_array=False
|
|
74
|
+
)
|
|
75
|
+
self.assertNotIsInstance(bit_fp, np.ndarray) # Should be RDKit ExplicitBitVect
|
|
76
|
+
|
|
77
|
+
def test_error_on_unsupported_fingerprint_type(self):
|
|
78
|
+
"""Test error handling for unsupported fingerprint types"""
|
|
79
|
+
smiles = "CCO"
|
|
80
|
+
with self.assertRaises(ValueError):
|
|
81
|
+
_ = SmilesFeaturizer.featurize_smiles(smiles, "unsupported_fp_type")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
unittest.main()
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
import numpy as np
|
|
3
|
+
from rdkit.DataStructs import cDataStructs
|
|
4
|
+
|
|
5
|
+
from synkit.Chem.Fingerprint.transformation_fp import TransformationFP
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestTransformationFP(unittest.TestCase):
|
|
9
|
+
|
|
10
|
+
def test_convert_arr2vec(self):
|
|
11
|
+
"""Test conversion of numpy array to RDKit ExplicitBitVect"""
|
|
12
|
+
input_array = np.array([1, 0, 1, 1, 0, 1])
|
|
13
|
+
bit_vect = TransformationFP.convert_arr2vec(input_array)
|
|
14
|
+
self.assertIsInstance(bit_vect, cDataStructs.ExplicitBitVect)
|
|
15
|
+
self.assertEqual(bit_vect.GetNumBits(), len(input_array))
|
|
16
|
+
|
|
17
|
+
def test_fit(self):
|
|
18
|
+
"""Test the generation of reaction fingerprints from reaction SMILES"""
|
|
19
|
+
reaction_smiles = "CCO.CCN>>CCOC(C)N"
|
|
20
|
+
symbols = ">>"
|
|
21
|
+
fp_type = "maccs"
|
|
22
|
+
abs_val = True
|
|
23
|
+
|
|
24
|
+
# Test with return_array=True
|
|
25
|
+
reaction_fp_array = TransformationFP.fit(
|
|
26
|
+
reaction_smiles, symbols, fp_type, abs_val
|
|
27
|
+
)
|
|
28
|
+
self.assertIsInstance(reaction_fp_array, np.ndarray)
|
|
29
|
+
|
|
30
|
+
# Test with return_array=False
|
|
31
|
+
reaction_fp_bitvect = TransformationFP.fit(
|
|
32
|
+
reaction_smiles, symbols, fp_type, abs_val, return_array=False
|
|
33
|
+
)
|
|
34
|
+
self.assertIsInstance(reaction_fp_bitvect, cDataStructs.ExplicitBitVect)
|
|
35
|
+
|
|
36
|
+
def test_fit_invalid_smiles(self):
|
|
37
|
+
"""Test fit method with invalid SMILES that should raise an error in underlying methods"""
|
|
38
|
+
reaction_smiles = "invalid_smiles>>invalid_smiles"
|
|
39
|
+
symbols = ">>"
|
|
40
|
+
fp_type = "maccs"
|
|
41
|
+
abs_val = True
|
|
42
|
+
with self.assertRaises(Exception):
|
|
43
|
+
_ = TransformationFP.fit(reaction_smiles, symbols, fp_type, abs_val)
|
|
44
|
+
|
|
45
|
+
def test_fit_reaction_split(self):
|
|
46
|
+
"""Test handling of SMILES split by symbols and impact on results"""
|
|
47
|
+
reaction_smiles = "CCO>>CCN" # Simple reaction split case
|
|
48
|
+
symbols = ">>"
|
|
49
|
+
fp_type = "maccs"
|
|
50
|
+
abs_val = False # without taking absolute values
|
|
51
|
+
reaction_fp = TransformationFP.fit(reaction_smiles, symbols, fp_type, abs_val)
|
|
52
|
+
self.assertIsInstance(reaction_fp, np.ndarray)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
if __name__ == "__main__":
|
|
56
|
+
unittest.main()
|
|
File without changes
|