synkit 0.0.16__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {synkit-0.0.16 → synkit-1.0.1}/PKG-INFO +3 -3
- {synkit-0.0.16 → synkit-1.0.1}/README.md +2 -2
- {synkit-0.0.16 → synkit-1.0.1}/pyproject.toml +12 -24
- synkit-1.0.1/synkit/Chem/Cluster/__init__.py +0 -0
- synkit-1.0.1/synkit/Chem/Cluster/butina.py +139 -0
- synkit-1.0.1/synkit/Chem/Fingerprint/__init__.py +0 -0
- synkit-1.0.1/synkit/Chem/Fingerprint/fp_calculator.py +155 -0
- synkit-1.0.1/synkit/Chem/Fingerprint/smiles_featurizer.py +258 -0
- synkit-1.0.1/synkit/Chem/Fingerprint/transformation_fp.py +135 -0
- synkit-1.0.1/synkit/Chem/Molecule/__init__.py +0 -0
- synkit-1.0.1/synkit/Chem/Molecule/standardize.py +167 -0
- synkit-1.0.1/synkit/Chem/Reaction/__init__.py +9 -0
- synkit-1.0.1/synkit/Chem/Reaction/aam_validator.py +264 -0
- synkit-1.0.1/synkit/Chem/Reaction/balance_check.py +138 -0
- synkit-1.0.1/synkit/Chem/Reaction/canon_rsmi.py +251 -0
- synkit-1.0.1/synkit/Chem/Reaction/cleaning.py +66 -0
- synkit-1.0.1/synkit/Chem/Reaction/deionize.py +199 -0
- synkit-1.0.1/synkit/Chem/Reaction/fix_aam.py +64 -0
- synkit-1.0.1/synkit/Chem/Reaction/neutralize.py +195 -0
- synkit-1.0.1/synkit/Chem/Reaction/radical_wildcard.py +223 -0
- synkit-1.0.1/synkit/Chem/Reaction/standardize.py +157 -0
- synkit-1.0.1/synkit/Chem/Reaction/tautomerize.py +162 -0
- synkit-1.0.1/synkit/Chem/__init__.py +1 -0
- synkit-1.0.1/synkit/Chem/utils.py +315 -0
- synkit-1.0.1/synkit/Data/__init__.py +0 -0
- synkit-1.0.1/synkit/Data/gen_partial_aam.py +147 -0
- synkit-1.0.1/synkit/Graph/Canon/__init__.py +3 -0
- synkit-1.0.1/synkit/Graph/Canon/canon_algs.py +254 -0
- synkit-1.0.1/synkit/Graph/Canon/canon_graph.py +530 -0
- synkit-1.0.1/synkit/Graph/Canon/nauty.py +320 -0
- synkit-1.0.1/synkit/Graph/Context/__init__.py +0 -0
- synkit-1.0.1/synkit/Graph/Context/hier_context.py +231 -0
- synkit-1.0.1/synkit/Graph/Context/radius_expand.py +242 -0
- synkit-1.0.1/synkit/Graph/Feature/Descriptors/topology.py +854 -0
- synkit-1.0.1/synkit/Graph/Feature/Fingerprint/__init__.py +0 -0
- synkit-1.0.1/synkit/Graph/Feature/Fingerprint/wl_rxn_fps.py +231 -0
- synkit-1.0.1/synkit/Graph/Feature/__init__.py +5 -0
- synkit-1.0.1/synkit/Graph/Feature/graph_descriptors.py +315 -0
- synkit-1.0.1/synkit/Graph/Feature/graph_fps.py +95 -0
- synkit-1.0.1/synkit/Graph/Feature/graph_signature.py +237 -0
- synkit-1.0.1/synkit/Graph/Feature/hash_fps.py +128 -0
- synkit-1.0.1/synkit/Graph/Feature/morgan_fps.py +85 -0
- synkit-1.0.1/synkit/Graph/Feature/path_fps.py +79 -0
- synkit-1.0.1/synkit/Graph/Feature/wl_hash.py +136 -0
- synkit-1.0.1/synkit/Graph/Hyrogen/__init__.py +0 -0
- synkit-1.0.1/synkit/Graph/Hyrogen/_misc.py +442 -0
- synkit-1.0.1/synkit/Graph/Hyrogen/hcomplete.py +354 -0
- synkit-1.0.1/synkit/Graph/Hyrogen/hextend.py +167 -0
- synkit-1.0.1/synkit/Graph/ITS/__init__.py +4 -0
- synkit-1.0.1/synkit/Graph/ITS/its_builder.py +114 -0
- synkit-1.0.1/synkit/Graph/ITS/its_construction.py +316 -0
- synkit-1.0.1/synkit/Graph/ITS/its_decompose.py +575 -0
- synkit-1.0.1/synkit/Graph/ITS/its_destruction.py +302 -0
- synkit-1.0.1/synkit/Graph/ITS/its_expand.py +86 -0
- synkit-1.0.1/synkit/Graph/ITS/its_relabel.py +186 -0
- synkit-1.0.1/synkit/Graph/ITS/normalize_aam.py +142 -0
- synkit-1.0.1/synkit/Graph/ITS/partial_its.py +238 -0
- synkit-1.0.1/synkit/Graph/MTG/__init__.py +0 -0
- synkit-1.0.1/synkit/Graph/MTG/group_comp.py +157 -0
- synkit-1.0.1/synkit/Graph/MTG/groupoid.py +358 -0
- synkit-1.0.1/synkit/Graph/MTG/mcs_matcher.py +248 -0
- synkit-1.0.1/synkit/Graph/MTG/mtg.py +886 -0
- synkit-1.0.1/synkit/Graph/MTG/mtg_explore.py +74 -0
- synkit-1.0.1/synkit/Graph/MTG/utils.py +425 -0
- synkit-1.0.1/synkit/Graph/Matcher/__init__.py +10 -0
- synkit-1.0.1/synkit/Graph/Matcher/automorphism.py +151 -0
- synkit-1.0.1/synkit/Graph/Matcher/batch_cluster.py +242 -0
- synkit-1.0.1/synkit/Graph/Matcher/graph_cluster.py +197 -0
- synkit-1.0.1/synkit/Graph/Matcher/graph_matcher.py +320 -0
- synkit-1.0.1/synkit/Graph/Matcher/graph_morphism.py +377 -0
- synkit-1.0.1/synkit/Graph/Matcher/mcs_matcher.py +202 -0
- synkit-1.0.1/synkit/Graph/Matcher/multi_turbo_iso.py +182 -0
- synkit-1.0.1/synkit/Graph/Matcher/partial_matcher.py +214 -0
- synkit-1.0.1/synkit/Graph/Matcher/sing.py +216 -0
- synkit-1.0.1/synkit/Graph/Matcher/subgraph_matcher.py +1162 -0
- synkit-1.0.1/synkit/Graph/Matcher/turbo_iso.py +209 -0
- synkit-1.0.1/synkit/Graph/Wildcard/__init__.py +0 -0
- synkit-1.0.1/synkit/Graph/Wildcard/fuse_graph.py +156 -0
- synkit-1.0.1/synkit/Graph/Wildcard/radwc.py +117 -0
- synkit-1.0.1/synkit/Graph/Wildcard/wildcard.py +230 -0
- synkit-1.0.1/synkit/Graph/__init__.py +17 -0
- synkit-1.0.1/synkit/Graph/canon_graph.py +530 -0
- synkit-1.0.1/synkit/Graph/syn_graph.py +155 -0
- synkit-1.0.1/synkit/Graph/utils.py +180 -0
- synkit-1.0.1/synkit/IO/__init__.py +3 -0
- synkit-1.0.1/synkit/IO/chem_converter.py +494 -0
- synkit-1.0.1/synkit/IO/combinatorial/__init__.py +8 -0
- synkit-1.0.1/synkit/IO/combinatorial/gml_to_graph.py +254 -0
- synkit-1.0.1/synkit/IO/combinatorial/graph_to_gml.py +291 -0
- synkit-1.0.1/synkit/IO/combinatorial/graph_to_smarts.py +189 -0
- synkit-1.0.1/synkit/IO/combinatorial/smarts_expander.py +152 -0
- synkit-1.0.1/synkit/IO/combinatorial/smarts_generalizer.py +134 -0
- synkit-1.0.1/synkit/IO/combinatorial/smarts_to_graph.py +183 -0
- synkit-1.0.1/synkit/IO/data_io.py +314 -0
- synkit-1.0.1/synkit/IO/data_process.py +48 -0
- synkit-1.0.1/synkit/IO/debug.py +73 -0
- synkit-1.0.1/synkit/IO/dg_to_gml.py +133 -0
- synkit-1.0.1/synkit/IO/gml_to_nx.py +151 -0
- synkit-1.0.1/synkit/IO/graph_to_mol.py +132 -0
- synkit-1.0.1/synkit/IO/mol_to_graph.py +354 -0
- synkit-1.0.1/synkit/IO/nx_to_gml.py +209 -0
- synkit-1.0.1/synkit/IO/smiles_to_id.py +118 -0
- synkit-1.0.1/synkit/Rule/Apply/__init__.py +0 -0
- synkit-1.0.1/synkit/Rule/Apply/reactor_rule.py +91 -0
- synkit-1.0.1/synkit/Rule/Apply/retro_reactor.py +213 -0
- synkit-1.0.1/synkit/Rule/Apply/rule_matcher.py +195 -0
- synkit-1.0.1/synkit/Rule/Apply/rule_rbl.py +86 -0
- synkit-1.0.1/synkit/Rule/Compose/__init__.py +0 -0
- synkit-1.0.1/synkit/Rule/Compose/compose_rule.py +226 -0
- synkit-1.0.1/synkit/Rule/Compose/rule_compose.py +236 -0
- synkit-1.0.1/synkit/Rule/Compose/rule_mapping.py +315 -0
- synkit-1.0.1/synkit/Rule/Compose/seq_comp.py +71 -0
- synkit-1.0.1/synkit/Rule/Compose/valence_constrain.py +107 -0
- synkit-1.0.1/synkit/Rule/Modify/__init__.py +0 -0
- synkit-1.0.1/synkit/Rule/Modify/implict_rule.py +65 -0
- synkit-1.0.1/synkit/Rule/Modify/longest_path.py +92 -0
- synkit-1.0.1/synkit/Rule/Modify/molecule_rule.py +112 -0
- synkit-1.0.1/synkit/Rule/Modify/prune_templates.py +75 -0
- synkit-1.0.1/synkit/Rule/Modify/rule_utils.py +193 -0
- synkit-1.0.1/synkit/Rule/Modify/strip_rule.py +97 -0
- synkit-1.0.1/synkit/Rule/__init__.py +1 -0
- synkit-1.0.1/synkit/Rule/syn_rule.py +282 -0
- synkit-1.0.1/synkit/Synthesis/CRN/__init__.py +0 -0
- synkit-1.0.1/synkit/Synthesis/CRN/crn.py +207 -0
- synkit-1.0.1/synkit/Synthesis/CRN/dcrn.py +137 -0
- synkit-1.0.1/synkit/Synthesis/CRN/mod_crn.py +160 -0
- synkit-1.0.1/synkit/Synthesis/MSR/__init__.py +0 -0
- synkit-1.0.1/synkit/Synthesis/MSR/multi_steps.py +137 -0
- synkit-1.0.1/synkit/Synthesis/MSR/path_finder.py +216 -0
- synkit-1.0.1/synkit/Synthesis/Metrics/__init__.py +0 -0
- synkit-1.0.1/synkit/Synthesis/Metrics/_base.py +49 -0
- synkit-1.0.1/synkit/Synthesis/Metrics/_plot.py +121 -0
- synkit-1.0.1/synkit/Synthesis/Metrics/_ranking.py +173 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/__init__.py +0 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/batch_reactor.py +462 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/benchmark.py +152 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/imba_engine.py +173 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/mod_aam.py +279 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/mod_reactor.py +428 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/partial_engine.py +70 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/post_syn.py +267 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/rbl_engine.py +122 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/rule_filter.py +195 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/single_predictor.py +90 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/strategy.py +51 -0
- synkit-1.0.1/synkit/Synthesis/Reactor/syn_reactor.py +609 -0
- synkit-1.0.1/synkit/Synthesis/__init__.py +0 -0
- synkit-1.0.1/synkit/Synthesis/reactor_utils.py +346 -0
- synkit-1.0.1/synkit/Utils/__init__.py +0 -0
- synkit-1.0.1/synkit/Utils/utils.py +178 -0
- synkit-1.0.1/synkit/Vis/__init__.py +5 -0
- synkit-1.0.1/synkit/Vis/chemical_space.py +83 -0
- synkit-1.0.1/synkit/Vis/embedding.py +85 -0
- synkit-1.0.1/synkit/Vis/graph_visualizer.py +382 -0
- synkit-1.0.1/synkit/Vis/pdf_writer.py +141 -0
- synkit-1.0.1/synkit/Vis/rule_vis.py +179 -0
- synkit-1.0.1/synkit/Vis/rxn_vis.py +159 -0
- synkit-1.0.1/synkit/__init__.py +0 -0
- synkit-1.0.1/synkit/examples.py +50 -0
- {synkit-0.0.16 → synkit-1.0.1}/.gitignore +0 -0
- {synkit-0.0.16 → synkit-1.0.1}/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: synkit
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Utility for reaction modeling using graph grammar
|
|
5
5
|
Project-URL: homepage, https://github.com/TieuLongPhan/SynKit
|
|
6
6
|
Project-URL: source, https://github.com/TieuLongPhan/SynKit
|
|
@@ -44,7 +44,7 @@ Description-Content-Type: text/markdown
|
|
|
44
44
|
|
|
45
45
|
**Toolkit for Synthesis Planning**
|
|
46
46
|
|
|
47
|
-
SynKit is a collection of tools designed to support the planning and execution of chemical synthesis.
|
|
47
|
+
SynKit is a collection of tools designed to support the planning and execution of chemical synthesis. Check out the [documentation](https://tieulongphan.github.io/SynKit/) for a comprehensive description of its features.
|
|
48
48
|
|
|
49
49
|

|
|
50
50
|
|
|
@@ -96,7 +96,7 @@ For more details on each utility within the repository, please refer to the docu
|
|
|
96
96
|
```bash
|
|
97
97
|
docker pull tieulongphan/synkit:latest
|
|
98
98
|
# or a specific version:
|
|
99
|
-
docker pull tieulongphan/synkit:
|
|
99
|
+
docker pull tieulongphan/synkit:1.0.0
|
|
100
100
|
```
|
|
101
101
|
Run a container (sanity check):
|
|
102
102
|
```
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
**Toolkit for Synthesis Planning**
|
|
15
15
|
|
|
16
|
-
SynKit is a collection of tools designed to support the planning and execution of chemical synthesis.
|
|
16
|
+
SynKit is a collection of tools designed to support the planning and execution of chemical synthesis. Check out the [documentation](https://tieulongphan.github.io/SynKit/) for a comprehensive description of its features.
|
|
17
17
|
|
|
18
18
|

|
|
19
19
|
|
|
@@ -65,7 +65,7 @@ For more details on each utility within the repository, please refer to the docu
|
|
|
65
65
|
```bash
|
|
66
66
|
docker pull tieulongphan/synkit:latest
|
|
67
67
|
# or a specific version:
|
|
68
|
-
docker pull tieulongphan/synkit:
|
|
68
|
+
docker pull tieulongphan/synkit:1.0.0
|
|
69
69
|
```
|
|
70
70
|
Run a container (sanity check):
|
|
71
71
|
```
|
|
@@ -4,17 +4,17 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "synkit"
|
|
7
|
-
version = "
|
|
8
|
-
license = { text = "MIT" }
|
|
9
|
-
license-files = ["LICENSE"]
|
|
10
|
-
authors = [
|
|
11
|
-
{ name = "Tieu Long Phan", email = "tieu@bioinf.uni-leipzig.de" }
|
|
12
|
-
]
|
|
7
|
+
version = "1.0.1"
|
|
13
8
|
description = "Utility for reaction modeling using graph grammar"
|
|
14
9
|
readme = "README.md"
|
|
15
10
|
long-description = { file = "CHANGELOG.md" }
|
|
16
11
|
long-description-content-type = "text/markdown"
|
|
17
12
|
requires-python = ">=3.11"
|
|
13
|
+
license = { text = "MIT" }
|
|
14
|
+
license-files = ["LICENSE"]
|
|
15
|
+
authors = [
|
|
16
|
+
{ name = "Tieu Long Phan", email = "tieu@bioinf.uni-leipzig.de" }
|
|
17
|
+
]
|
|
18
18
|
classifiers = [
|
|
19
19
|
"Programming Language :: Python :: 3",
|
|
20
20
|
"License :: OSI Approved :: MIT License",
|
|
@@ -39,28 +39,16 @@ docs = [
|
|
|
39
39
|
]
|
|
40
40
|
|
|
41
41
|
[project.urls]
|
|
42
|
-
homepage
|
|
43
|
-
source
|
|
44
|
-
issues
|
|
42
|
+
homepage = "https://github.com/TieuLongPhan/SynKit"
|
|
43
|
+
source = "https://github.com/TieuLongPhan/SynKit"
|
|
44
|
+
issues = "https://github.com/TieuLongPhan/SynKit/issues"
|
|
45
45
|
documentation = "https://tieulongphan.github.io/SynKit/"
|
|
46
46
|
|
|
47
47
|
[tool.hatch.build]
|
|
48
|
-
|
|
49
|
-
include = [
|
|
50
|
-
{ path = "synkit/Data/*.json" },
|
|
51
|
-
{ path = "synkit/Data/*.json.gz" }
|
|
52
|
-
]
|
|
48
|
+
packages = ["synkit"]
|
|
53
49
|
|
|
54
50
|
[tool.hatch.build.targets.wheel]
|
|
55
|
-
|
|
56
|
-
include = [
|
|
57
|
-
"synkit/Data/*.json",
|
|
58
|
-
"synkit/Data/*.json.gz"
|
|
59
|
-
]
|
|
51
|
+
include = ["synkit/Data/**"]
|
|
60
52
|
|
|
61
53
|
[tool.hatch.build.targets.sdist]
|
|
62
|
-
|
|
63
|
-
include = [
|
|
64
|
-
"synkit/Data/*.json",
|
|
65
|
-
"synkit/Data/*.json.gz"
|
|
66
|
-
]
|
|
54
|
+
include = ["synkit/Data/**"]
|
|
File without changes
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from rdkit.DataStructs import cDataStructs, CreateFromBitString, BulkTanimotoSimilarity
|
|
6
|
+
from rdkit.ML.Cluster import Butina
|
|
7
|
+
from sklearn.manifold import TSNE
|
|
8
|
+
import matplotlib.pyplot as plt
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ButinaCluster:
|
|
12
|
+
"""Cluster chemical fingerprint vectors using the Butina algorithm from
|
|
13
|
+
RDKit, with integrated t-SNE visualization of clusters.
|
|
14
|
+
|
|
15
|
+
Key features
|
|
16
|
+
------------
|
|
17
|
+
* **Butina clustering** – fast hierarchical clustering with a similarity cutoff.
|
|
18
|
+
* **t-SNE visualization** – 2D embedding of fingerprints, highlighting top‑k clusters.
|
|
19
|
+
* **NumPy support** – accepts 2D arrays of 0/1 fingerprint data.
|
|
20
|
+
* **Configurable** – user‑defined cutoff, perplexity, and top‑k highlight.
|
|
21
|
+
|
|
22
|
+
Quick start
|
|
23
|
+
-----------
|
|
24
|
+
>>> from synkit.Chem.Fingerprint.fingerprint_clusterer import ButinaCluster
|
|
25
|
+
>>> clusters = ButinaCluster.cluster(arr, cutoff=0.3)
|
|
26
|
+
>>> ButinaCluster.visualize(arr, clusters, k=5)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def cluster(arr: np.ndarray, cutoff: float = 0.2) -> List[List[int]]:
|
|
31
|
+
"""Perform Butina clustering on fingerprint bit-vectors.
|
|
32
|
+
|
|
33
|
+
:param arr: 2D array of shape (n_samples, n_bits) with 0/1
|
|
34
|
+
dtype.
|
|
35
|
+
:type arr: np.ndarray
|
|
36
|
+
:param cutoff: Distance cutoff (1 – similarity) to form
|
|
37
|
+
clusters. Defaults to 0.2.
|
|
38
|
+
:type cutoff: float
|
|
39
|
+
:returns: List of clusters, each a list of sample indices.
|
|
40
|
+
:rtype: list of list of int
|
|
41
|
+
"""
|
|
42
|
+
# Convert rows to RDKit ExplicitBitVect
|
|
43
|
+
fps: List[cDataStructs.ExplicitBitVect] = []
|
|
44
|
+
for row in arr:
|
|
45
|
+
bitstr = "".join(str(int(b)) for b in row.tolist())
|
|
46
|
+
fps.append(CreateFromBitString(bitstr))
|
|
47
|
+
|
|
48
|
+
n = len(fps)
|
|
49
|
+
# Build flattened upper‐triangular distance list
|
|
50
|
+
distances: List[float] = []
|
|
51
|
+
for i in range(n):
|
|
52
|
+
# fmt: off
|
|
53
|
+
sims = BulkTanimotoSimilarity(fps[i], fps[i + 1:])
|
|
54
|
+
# fmt: on
|
|
55
|
+
distances.extend((1.0 - np.array(sims, dtype=float)).tolist())
|
|
56
|
+
|
|
57
|
+
# Cluster: ClusterData(distanceList, nPts, cutoff, isDistData)
|
|
58
|
+
clusters = Butina.ClusterData(distances, n, cutoff, True)
|
|
59
|
+
return clusters
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def visualize(
|
|
63
|
+
arr: np.ndarray,
|
|
64
|
+
clusters: List[List[int]],
|
|
65
|
+
k: Optional[int] = None,
|
|
66
|
+
perplexity: float = 30.0,
|
|
67
|
+
random_state: int = 42,
|
|
68
|
+
) -> None:
|
|
69
|
+
"""Visualize clusters in 2D via t-SNE embedding.
|
|
70
|
+
|
|
71
|
+
:param arr: 2D array of shape (n_samples, n_features) with fingerprint data.
|
|
72
|
+
:type arr: np.ndarray
|
|
73
|
+
:param clusters: Clusters as returned by `cluster()`.
|
|
74
|
+
:type clusters: list of list of int
|
|
75
|
+
:param k: If provided, highlight only the top‑k largest clusters; others shown as 'Other'.
|
|
76
|
+
:type k: int or None
|
|
77
|
+
:param perplexity: t-SNE perplexity parameter. Defaults to 30.0.
|
|
78
|
+
:type perplexity: float
|
|
79
|
+
:param random_state: Random seed for reproducibility. Defaults to 42.
|
|
80
|
+
:type random_state: int
|
|
81
|
+
:returns: None
|
|
82
|
+
:rtype: NoneType
|
|
83
|
+
|
|
84
|
+
:example:
|
|
85
|
+
>>> clusters = ButinaCluster.cluster(arr, cutoff=0.3)
|
|
86
|
+
>>> ButinaCluster.visualize(arr, clusters, k=5)
|
|
87
|
+
"""
|
|
88
|
+
n = arr.shape[0]
|
|
89
|
+
# assign labels: cluster idx or -1 for 'Other'
|
|
90
|
+
labels = np.full(n, -1, dtype=int)
|
|
91
|
+
# sort clusters by size
|
|
92
|
+
sorted_idx = sorted(
|
|
93
|
+
range(len(clusters)), key=lambda i: len(clusters[i]), reverse=True
|
|
94
|
+
)
|
|
95
|
+
top = set(sorted_idx[:k]) if k is not None else set(sorted_idx)
|
|
96
|
+
for idx, cluster in enumerate(clusters):
|
|
97
|
+
for i in cluster:
|
|
98
|
+
labels[i] = idx if idx in top else -1
|
|
99
|
+
|
|
100
|
+
# compute t-SNE embedding
|
|
101
|
+
tsne = TSNE(n_components=2, perplexity=perplexity, random_state=random_state)
|
|
102
|
+
emb = tsne.fit_transform(arr)
|
|
103
|
+
|
|
104
|
+
# plot
|
|
105
|
+
plt.figure(figsize=(8, 6))
|
|
106
|
+
unique = sorted(set(labels))
|
|
107
|
+
for lab in unique:
|
|
108
|
+
mask = labels == lab
|
|
109
|
+
if lab == -1:
|
|
110
|
+
plt.scatter(
|
|
111
|
+
emb[mask, 0], emb[mask, 1], color="gray", alpha=0.3, label="Other"
|
|
112
|
+
)
|
|
113
|
+
else:
|
|
114
|
+
plt.scatter(
|
|
115
|
+
emb[mask, 0], emb[mask, 1], alpha=0.7, label=f"Cluster {lab}"
|
|
116
|
+
)
|
|
117
|
+
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
|
|
118
|
+
plt.title("t-SNE visualization of Butina clusters")
|
|
119
|
+
plt.xlabel("t-SNE dim 1")
|
|
120
|
+
plt.ylabel("t-SNE dim 2")
|
|
121
|
+
plt.tight_layout()
|
|
122
|
+
plt.show()
|
|
123
|
+
|
|
124
|
+
def __str__(self) -> str:
|
|
125
|
+
"""Short description of the clusterer.
|
|
126
|
+
|
|
127
|
+
:returns: Class name.
|
|
128
|
+
:rtype: str
|
|
129
|
+
"""
|
|
130
|
+
return "<ButinaCluster>"
|
|
131
|
+
|
|
132
|
+
def help(self) -> None:
|
|
133
|
+
"""Print usage summary for clustering and visualization.
|
|
134
|
+
|
|
135
|
+
:returns: None
|
|
136
|
+
:rtype: NoneType
|
|
137
|
+
"""
|
|
138
|
+
print("ButinaCluster.cluster(arr, cutoff=0.2)")
|
|
139
|
+
print("ButinaCluster.visualize(arr, clusters, k=None, perplexity=30.0)")
|
|
File without changes
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Any, Dict, List
|
|
3
|
+
from joblib import Parallel, delayed
|
|
4
|
+
|
|
5
|
+
from synkit.IO.debug import configure_warnings_and_logs
|
|
6
|
+
from synkit.Chem.Fingerprint.transformation_fp import TransformationFP
|
|
7
|
+
|
|
8
|
+
configure_warnings_and_logs(True, True)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FPCalculator:
|
|
12
|
+
"""Calculate fingerprint vectors for chemical reactions represented by
|
|
13
|
+
SMILES strings.
|
|
14
|
+
|
|
15
|
+
:cvar fps: Shared fingerprint engine instance.
|
|
16
|
+
:vartype fps: TransformationFP
|
|
17
|
+
:cvar VALID_FP_TYPES: Supported fingerprint type identifiers.
|
|
18
|
+
:vartype VALID_FP_TYPES: List[str]
|
|
19
|
+
:param n_jobs: Number of parallel jobs to use for batch processing.
|
|
20
|
+
:type n_jobs: int
|
|
21
|
+
:param verbose: Verbosity level for parallel execution.
|
|
22
|
+
:type verbose: int
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
fps: TransformationFP = TransformationFP()
|
|
26
|
+
VALID_FP_TYPES: List[str] = [
|
|
27
|
+
"drfp",
|
|
28
|
+
"avalon",
|
|
29
|
+
"maccs",
|
|
30
|
+
"torsion",
|
|
31
|
+
"pharm2D",
|
|
32
|
+
"ecfp2",
|
|
33
|
+
"ecfp4",
|
|
34
|
+
"ecfp6",
|
|
35
|
+
"fcfp2",
|
|
36
|
+
"fcfp4",
|
|
37
|
+
"fcfp6",
|
|
38
|
+
"rdk5",
|
|
39
|
+
"rdk6",
|
|
40
|
+
"rdk7",
|
|
41
|
+
"ap",
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
def __init__(self, n_jobs: int = 1, verbose: int = 0) -> None:
|
|
45
|
+
"""Initialize the FPCalculator.
|
|
46
|
+
|
|
47
|
+
:param n_jobs: Number of parallel jobs to use for fingerprint
|
|
48
|
+
computation.
|
|
49
|
+
:type n_jobs: int
|
|
50
|
+
:param verbose: Verbosity level for the parallel processing.
|
|
51
|
+
:type verbose: int
|
|
52
|
+
"""
|
|
53
|
+
self.n_jobs = n_jobs
|
|
54
|
+
self.verbose = verbose
|
|
55
|
+
|
|
56
|
+
def _validate_fp_type(self, fp_type: str) -> None:
|
|
57
|
+
"""Ensure the requested fingerprint type is supported.
|
|
58
|
+
|
|
59
|
+
:param fp_type: Fingerprint type identifier to validate.
|
|
60
|
+
:type fp_type: str
|
|
61
|
+
:raises ValueError: If `fp_type` is not in VALID_FP_TYPES.
|
|
62
|
+
"""
|
|
63
|
+
if fp_type not in self.VALID_FP_TYPES:
|
|
64
|
+
valid = ", ".join(self.VALID_FP_TYPES)
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"Unsupported fingerprint type '{fp_type}'. Supported types: {valid}."
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def dict_process(
|
|
71
|
+
data_dict: Dict[str, Any],
|
|
72
|
+
rsmi_key: str,
|
|
73
|
+
symbol: str = ">>",
|
|
74
|
+
fp_type: str = "ecfp4",
|
|
75
|
+
absolute: bool = True,
|
|
76
|
+
) -> Dict[str, Any]:
|
|
77
|
+
"""Compute a fingerprint for a single reaction SMILES entry and add it
|
|
78
|
+
to the dict.
|
|
79
|
+
|
|
80
|
+
:param data_dict: Dictionary containing reaction data.
|
|
81
|
+
:type data_dict: dict
|
|
82
|
+
:param rsmi_key: Key in `data_dict` for the reaction SMILES string.
|
|
83
|
+
:type rsmi_key: str
|
|
84
|
+
:param symbol: Delimiter between reactant and product in the SMILES.
|
|
85
|
+
:type symbol: str
|
|
86
|
+
:param fp_type: Fingerprint type to compute.
|
|
87
|
+
:type fp_type: str
|
|
88
|
+
:param absolute: Whether to take absolute values of the fingerprint difference.
|
|
89
|
+
:type absolute: bool
|
|
90
|
+
:returns: The input dictionary with a new key `fp_{fp_type}` holding the fingerprint vector.
|
|
91
|
+
:rtype: dict
|
|
92
|
+
:raises ValueError: If `rsmi_key` is missing in `data_dict`.
|
|
93
|
+
"""
|
|
94
|
+
if rsmi_key not in data_dict:
|
|
95
|
+
raise ValueError(f"Key '{rsmi_key}' not found in data dictionary.")
|
|
96
|
+
# compute and insert fingerprint
|
|
97
|
+
vec = FPCalculator.fps.fit(
|
|
98
|
+
data_dict[rsmi_key], symbols=symbol, fp_type=fp_type, abs=absolute
|
|
99
|
+
)
|
|
100
|
+
data_dict[f"{fp_type}"] = vec
|
|
101
|
+
return data_dict
|
|
102
|
+
|
|
103
|
+
def parallel_process(
|
|
104
|
+
self,
|
|
105
|
+
data_dicts: List[Dict[str, Any]],
|
|
106
|
+
rsmi_key: str,
|
|
107
|
+
symbol: str = ">>",
|
|
108
|
+
fp_type: str = "ecfp4",
|
|
109
|
+
absolute: bool = True,
|
|
110
|
+
) -> List[Dict[str, Any]]:
|
|
111
|
+
"""Compute fingerprints for a batch of reaction dictionaries in
|
|
112
|
+
parallel.
|
|
113
|
+
|
|
114
|
+
:param data_dicts: List of dictionaries, each containing a reaction SMILES.
|
|
115
|
+
:type data_dicts: list of dict
|
|
116
|
+
:param rsmi_key: Key in each dict for the reaction SMILES string.
|
|
117
|
+
:type rsmi_key: str
|
|
118
|
+
:param symbol: Delimiter between reactant and product in the SMILES.
|
|
119
|
+
:type symbol: str
|
|
120
|
+
:param fp_type: Fingerprint type to compute.
|
|
121
|
+
:type fp_type: str
|
|
122
|
+
:param absolute: Whether to take absolute values of the fingerprint difference.
|
|
123
|
+
:type absolute: bool
|
|
124
|
+
:returns: A list of dictionaries augmented with `fp_{fp_type}` entries.
|
|
125
|
+
:rtype: list of dict
|
|
126
|
+
:raises ValueError: If `fp_type` is unsupported or any dict is missing `rsmi_key`.
|
|
127
|
+
"""
|
|
128
|
+
# Validate fingerprint type once
|
|
129
|
+
self._validate_fp_type(fp_type)
|
|
130
|
+
|
|
131
|
+
# Process in parallel
|
|
132
|
+
results = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
|
|
133
|
+
delayed(self.dict_process)(dd, rsmi_key, symbol, fp_type, absolute)
|
|
134
|
+
for dd in data_dicts
|
|
135
|
+
)
|
|
136
|
+
return results
|
|
137
|
+
|
|
138
|
+
def __str__(self) -> str:
|
|
139
|
+
"""Short string summarizing the calculator configuration.
|
|
140
|
+
|
|
141
|
+
:returns: A summary of n_jobs and verbosity.
|
|
142
|
+
:rtype: str
|
|
143
|
+
"""
|
|
144
|
+
return f"<FPCalculator n_jobs={self.n_jobs} verbose={self.verbose}>"
|
|
145
|
+
|
|
146
|
+
def help(self) -> None:
|
|
147
|
+
"""Print details about supported fingerprint types and usage.
|
|
148
|
+
|
|
149
|
+
:returns: None
|
|
150
|
+
:rtype: NoneType
|
|
151
|
+
"""
|
|
152
|
+
print("FPCalculator supports the following fingerprint types:")
|
|
153
|
+
for t in self.VALID_FP_TYPES:
|
|
154
|
+
print(" -", t)
|
|
155
|
+
print(f"Configured for {self.n_jobs} parallel jobs, verbose={self.verbose}")
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""smiles_featurizer.py
|
|
2
|
+
=======================
|
|
3
|
+
Utility for converting SMILES strings into various cheminformatics fingerprints,
|
|
4
|
+
with optional NumPy‐array conversion.
|
|
5
|
+
|
|
6
|
+
Key features
|
|
7
|
+
------------
|
|
8
|
+
* **Multi‐fingerprint support** – MACCS, Avalon, ECFP/FCFP, RDKit, AtomPair, Torsion, Pharm2D
|
|
9
|
+
* **SMILES validation** – raises on invalid input
|
|
10
|
+
* **Array conversion** – output as NumPy arrays for ML pipelines
|
|
11
|
+
* **Extensible** – add new methods or override via subclassing
|
|
12
|
+
|
|
13
|
+
Quick start
|
|
14
|
+
-----------
|
|
15
|
+
>>> from synkit.Chem.Fingerprint.smiles_featurizer import SmilesFeaturizer
|
|
16
|
+
>>> arr = SmilesFeaturizer.featurize_smiles("CCO", "ecfp4", convert_to_array=True)
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
from rdkit import Chem, DataStructs
|
|
24
|
+
from rdkit.Chem import AllChem, MACCSkeys
|
|
25
|
+
from rdkit.Chem.AtomPairs import Pairs, Torsions
|
|
26
|
+
from rdkit.Avalon import pyAvalonTools as fpAvalon
|
|
27
|
+
from rdkit.Chem.Pharm2D import Gobbi_Pharm2D, Generate
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SmilesFeaturizer:
|
|
31
|
+
"""Convert SMILES strings into chemical fingerprint vectors.
|
|
32
|
+
|
|
33
|
+
:cvar None: This class only provides static/class methods and holds no state.
|
|
34
|
+
|
|
35
|
+
Supported fingerprint methods:
|
|
36
|
+
- MACCS keys
|
|
37
|
+
- Avalon
|
|
38
|
+
- ECFP/FCFP (Morgan)
|
|
39
|
+
- RDKit topological
|
|
40
|
+
- AtomPair
|
|
41
|
+
- Torsion
|
|
42
|
+
- 2D Pharmacophore
|
|
43
|
+
|
|
44
|
+
Use `featurize_smiles` for one‑line access.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self) -> None:
|
|
48
|
+
"""Initialize SmilesFeaturizer.
|
|
49
|
+
|
|
50
|
+
This class has no instance state; all methods are static or
|
|
51
|
+
class‑level.
|
|
52
|
+
"""
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def smiles_to_mol(smiles: str) -> Chem.Mol:
|
|
57
|
+
"""Convert a SMILES string to an RDKit Mol object.
|
|
58
|
+
|
|
59
|
+
:param smiles: The SMILES string to convert.
|
|
60
|
+
:type smiles: str
|
|
61
|
+
:returns: RDKit Mol object corresponding to the SMILES.
|
|
62
|
+
:rtype: Chem.Mol
|
|
63
|
+
:raises ValueError: If the SMILES string is invalid.
|
|
64
|
+
"""
|
|
65
|
+
mol = Chem.MolFromSmiles(smiles)
|
|
66
|
+
if mol is None:
|
|
67
|
+
raise ValueError(f"Invalid SMILES string: {smiles!r}")
|
|
68
|
+
return mol
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def get_maccs_keys(mol: Chem.Mol) -> Any:
|
|
72
|
+
"""Generate the MACCS keys fingerprint for a molecule.
|
|
73
|
+
|
|
74
|
+
:param mol: RDKit Mol object.
|
|
75
|
+
:type mol: Chem.Mol
|
|
76
|
+
:returns: MACCS keys fingerprint bit vector.
|
|
77
|
+
:rtype: ExplicitBitVect
|
|
78
|
+
"""
|
|
79
|
+
return MACCSkeys.GenMACCSKeys(mol)
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def get_avalon_fp(mol: Chem.Mol, nBits: int = 1024) -> Any:
|
|
83
|
+
"""Generate the Avalon fingerprint for a molecule.
|
|
84
|
+
|
|
85
|
+
:param mol: RDKit Mol object.
|
|
86
|
+
:type mol: Chem.Mol
|
|
87
|
+
:param nBits: Length of the fingerprint vector.
|
|
88
|
+
:type nBits: int
|
|
89
|
+
:returns: Avalon fingerprint bit vector.
|
|
90
|
+
:rtype: ExplicitBitVect
|
|
91
|
+
"""
|
|
92
|
+
return fpAvalon.GetAvalonFP(mol, nBits)
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def get_ecfp(
|
|
96
|
+
mol: Chem.Mol, radius: int, nBits: int = 2048, useFeatures: bool = False
|
|
97
|
+
) -> Any:
|
|
98
|
+
"""Generate a Morgan fingerprint (ECFP or FCFP) for a molecule.
|
|
99
|
+
|
|
100
|
+
:param mol: RDKit Mol object.
|
|
101
|
+
:type mol: Chem.Mol
|
|
102
|
+
:param radius: Radius for the Morgan algorithm.
|
|
103
|
+
:type radius: int
|
|
104
|
+
:param nBits: Length of the fingerprint vector.
|
|
105
|
+
:type nBits: int
|
|
106
|
+
:param useFeatures: If True, generate a Feature‑Class
|
|
107
|
+
fingerprint (FCFP).
|
|
108
|
+
:type useFeatures: bool
|
|
109
|
+
:returns: Morgan fingerprint bit vector.
|
|
110
|
+
:rtype: ExplicitBitVect
|
|
111
|
+
"""
|
|
112
|
+
return AllChem.GetMorganFingerprintAsBitVect(
|
|
113
|
+
mol, radius, nBits=nBits, useFeatures=useFeatures
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
@staticmethod
|
|
117
|
+
def get_rdk_fp(
|
|
118
|
+
mol: Chem.Mol, maxPath: int, fpSize: int = 2048, nBitsPerHash: int = 2
|
|
119
|
+
) -> Any:
|
|
120
|
+
"""Generate an RDKit topological fingerprint for a molecule.
|
|
121
|
+
|
|
122
|
+
:param mol: RDKit Mol object.
|
|
123
|
+
:type mol: Chem.Mol
|
|
124
|
+
:param maxPath: Maximum path length (bonds) to include.
|
|
125
|
+
:type maxPath: int
|
|
126
|
+
:param fpSize: Length of the fingerprint vector.
|
|
127
|
+
:type fpSize: int
|
|
128
|
+
:param nBitsPerHash: Bits per hash for path hashing.
|
|
129
|
+
:type nBitsPerHash: int
|
|
130
|
+
:returns: RDKit topological fingerprint bit vector.
|
|
131
|
+
:rtype: ExplicitBitVect
|
|
132
|
+
"""
|
|
133
|
+
return Chem.RDKFingerprint(
|
|
134
|
+
mol, maxPath=maxPath, fpSize=fpSize, nBitsPerHash=nBitsPerHash
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
@staticmethod
|
|
138
|
+
def mol_to_ap(mol: Chem.Mol) -> Any:
|
|
139
|
+
"""Generate an Atom Pair fingerprint for a molecule.
|
|
140
|
+
|
|
141
|
+
:param mol: RDKit Mol object.
|
|
142
|
+
:type mol: Chem.Mol
|
|
143
|
+
:returns: Atom Pair fingerprint as an integer vector.
|
|
144
|
+
:rtype: ExplicitBitVect
|
|
145
|
+
"""
|
|
146
|
+
return Pairs.GetAtomPairFingerprint(mol)
|
|
147
|
+
|
|
148
|
+
@staticmethod
|
|
149
|
+
def mol_to_torsion(mol: Chem.Mol) -> Any:
|
|
150
|
+
"""Generate a Topological Torsion fingerprint for a molecule.
|
|
151
|
+
|
|
152
|
+
:param mol: RDKit Mol object.
|
|
153
|
+
:type mol: Chem.Mol
|
|
154
|
+
:returns: Torsion fingerprint as an integer vector.
|
|
155
|
+
:rtype: ExplicitBitVect
|
|
156
|
+
"""
|
|
157
|
+
return Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol)
|
|
158
|
+
|
|
159
|
+
@staticmethod
|
|
160
|
+
def mol_to_pharm2d(mol: Chem.Mol) -> Any:
|
|
161
|
+
"""Generate a 2D Pharmacophore fingerprint for a molecule.
|
|
162
|
+
|
|
163
|
+
:param mol: RDKit Mol object.
|
|
164
|
+
:type mol: Chem.Mol
|
|
165
|
+
:returns: 2D pharmacophore fingerprint bit vector.
|
|
166
|
+
:rtype: ExplicitBitVect
|
|
167
|
+
"""
|
|
168
|
+
return Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)
|
|
169
|
+
|
|
170
|
+
@classmethod
|
|
171
|
+
def featurize_smiles(
|
|
172
|
+
cls,
|
|
173
|
+
smiles: str,
|
|
174
|
+
fingerprint_type: str,
|
|
175
|
+
convert_to_array: bool = True,
|
|
176
|
+
**kwargs: Any,
|
|
177
|
+
) -> Any:
|
|
178
|
+
"""Featurize a SMILES string into a chosen fingerprint, optionally
|
|
179
|
+
converting to a NumPy array.
|
|
180
|
+
|
|
181
|
+
:param smiles: The SMILES string to featurize.
|
|
182
|
+
:type smiles: str
|
|
183
|
+
:param fingerprint_type: One of 'maccs', 'avalon', 'ecfp#', 'fcfp#',
|
|
184
|
+
'rdk#', 'ap', 'torsion', 'pharm2d'.
|
|
185
|
+
:type fingerprint_type: str
|
|
186
|
+
:param convert_to_array: If True, convert the result to a NumPy array.
|
|
187
|
+
:type convert_to_array: bool
|
|
188
|
+
:param kwargs: Additional parameters passed to the chosen method:
|
|
189
|
+
- `nBits` for Avalon/ECFP/FCFP
|
|
190
|
+
- `radius` for ECFP/FCFP
|
|
191
|
+
- `maxPath`, `fpSize`, `nBitsPerHash` for RDKit FP
|
|
192
|
+
:type kwargs: dict
|
|
193
|
+
:returns: Fingerprint as a NumPy array (if `convert_to_array`) or RDKit bit vector.
|
|
194
|
+
:rtype: np.ndarray or ExplicitBitVect
|
|
195
|
+
:raises ValueError: If `fingerprint_type` is unsupported.
|
|
196
|
+
"""
|
|
197
|
+
mol = cls.smiles_to_mol(smiles)
|
|
198
|
+
|
|
199
|
+
ft = fingerprint_type.lower()
|
|
200
|
+
if ft == "maccs":
|
|
201
|
+
fp = cls.get_maccs_keys(mol)
|
|
202
|
+
elif ft == "avalon":
|
|
203
|
+
fp = cls.get_avalon_fp(mol, nBits=kwargs.get("nBits", 1024))
|
|
204
|
+
elif ft.startswith("ecfp") or ft.startswith("fcfp"):
|
|
205
|
+
radius = int(ft[4])
|
|
206
|
+
use_features = ft.startswith("fcfp")
|
|
207
|
+
fp = cls.get_ecfp(
|
|
208
|
+
mol,
|
|
209
|
+
radius,
|
|
210
|
+
nBits=kwargs.get("nBits", 2048),
|
|
211
|
+
useFeatures=use_features,
|
|
212
|
+
)
|
|
213
|
+
elif ft.startswith("rdk"):
|
|
214
|
+
max_path = int(ft[3])
|
|
215
|
+
fp = cls.get_rdk_fp(
|
|
216
|
+
mol,
|
|
217
|
+
maxPath=max_path,
|
|
218
|
+
fpSize=kwargs.get("fpSize", 2048),
|
|
219
|
+
nBitsPerHash=kwargs.get("nBitsPerHash", 2),
|
|
220
|
+
)
|
|
221
|
+
elif ft == "ap":
|
|
222
|
+
fp = cls.mol_to_ap(mol)
|
|
223
|
+
elif ft == "torsion":
|
|
224
|
+
fp = cls.mol_to_torsion(mol)
|
|
225
|
+
elif ft == "pharm2d":
|
|
226
|
+
fp = cls.mol_to_pharm2d(mol)
|
|
227
|
+
else:
|
|
228
|
+
raise ValueError(f"Unsupported fingerprint type: {fingerprint_type!r}")
|
|
229
|
+
|
|
230
|
+
if convert_to_array:
|
|
231
|
+
if ft == "pharm2d":
|
|
232
|
+
bitstr = fp.ToBitString()
|
|
233
|
+
return np.array([int(b) for b in bitstr], dtype=np.int8)
|
|
234
|
+
arr = np.zeros((fp.GetNumBits(),), dtype=np.int8)
|
|
235
|
+
DataStructs.ConvertToNumpyArray(fp, arr)
|
|
236
|
+
return arr
|
|
237
|
+
|
|
238
|
+
return fp
|
|
239
|
+
|
|
240
|
+
def __str__(self) -> str:
|
|
241
|
+
"""Short description of the featurizer.
|
|
242
|
+
|
|
243
|
+
:returns: Class name.
|
|
244
|
+
:rtype: str
|
|
245
|
+
"""
|
|
246
|
+
return "<SmilesFeaturizer>"
|
|
247
|
+
|
|
248
|
+
def help(self) -> None:
|
|
249
|
+
"""Print supported fingerprint types and usage summary.
|
|
250
|
+
|
|
251
|
+
:returns: None
|
|
252
|
+
:rtype: NoneType
|
|
253
|
+
"""
|
|
254
|
+
print("SmilesFeaturizer supports the following fingerprint types:")
|
|
255
|
+
print(" - maccs, avalon, ecfp#, fcfp#, rdk#, ap, torsion, pharm2d")
|
|
256
|
+
print(
|
|
257
|
+
"Usage: SmilesFeaturizer.featurize_smiles(smiles, fingerprint_type, **kwargs)"
|
|
258
|
+
)
|