synkit 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synkit/Chem/Fingerprint/__init__.py +0 -0
- synkit/Chem/Fingerprint/fp_calculator.py +122 -0
- synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
- synkit/Chem/Fingerprint/transformation_fp.py +79 -0
- synkit/Chem/Molecule/__init__.py +0 -0
- synkit/Chem/Molecule/standardize.py +137 -0
- synkit/Chem/Reaction/__init__.py +0 -0
- synkit/Chem/Reaction/balance_check.py +162 -0
- synkit/Chem/Reaction/cleanning.py +59 -0
- synkit/Chem/Reaction/deionize.py +289 -0
- synkit/Chem/Reaction/neutralize.py +256 -0
- synkit/Chem/Reaction/reagent.py +102 -0
- synkit/Chem/Reaction/standardize.py +157 -0
- synkit/Chem/Reaction/tautomerize.py +168 -0
- synkit/Graph/Cluster/__init__.py +0 -0
- synkit/Graph/Cluster/morphism.py +83 -0
- synkit/Graph/Feature/__init__.py +0 -0
- synkit/Graph/Feature/graph_descriptors.py +325 -0
- synkit/Graph/Feature/graph_fps.py +97 -0
- synkit/Graph/Feature/graph_signature.py +236 -0
- synkit/Graph/Feature/hash_fps.py +130 -0
- synkit/Graph/Feature/morgan_fps.py +87 -0
- synkit/Graph/Feature/path_fps.py +82 -0
- synkit/Graph/__init.py +0 -0
- synkit/IO/__init__.py +0 -0
- synkit/IO/chem_converter.py +231 -0
- synkit/IO/data_io.py +277 -0
- synkit/IO/data_process.py +49 -0
- synkit/IO/debug.py +78 -0
- synkit/IO/dg_to_gml.py +124 -0
- synkit/IO/gml_to_nx.py +119 -0
- synkit/IO/graph_to_mol.py +110 -0
- synkit/IO/mol_to_graph.py +282 -0
- synkit/IO/nx_to_gml.py +200 -0
- synkit/IO/parse_rule.py +172 -0
- synkit/IO/smiles_to_id.py +119 -0
- synkit/ITS/_misc.py +280 -0
- synkit/ITS/aam_validator.py +254 -0
- synkit/ITS/its_builder.py +94 -0
- synkit/ITS/its_construction.py +213 -0
- synkit/ITS/normalize_aam.py +183 -0
- synkit/ITS/partial_expand.py +170 -0
- synkit/Reactor/__init__.py +0 -0
- synkit/Reactor/core_engine.py +164 -0
- synkit/Reactor/inference.py +73 -0
- synkit/Reactor/multi_step.py +227 -0
- synkit/Reactor/multi_step_aam.py +82 -0
- synkit/Reactor/reagent.py +95 -0
- synkit/Reactor/rule_apply.py +81 -0
- synkit/Vis/__init__.py +0 -0
- synkit/Vis/chemical_graph_visualizer.py +378 -0
- synkit/Vis/chemical_reaction_visualizer.py +133 -0
- synkit/Vis/chemical_space.py +83 -0
- synkit/Vis/embedding.py +92 -0
- synkit/Vis/graph_visualizer.py +286 -0
- synkit/Vis/pdf_writer.py +143 -0
- synkit/Vis/rsmi_to_fig.py +169 -0
- synkit/__init__.py +0 -0
- synkit/_misc.py +181 -0
- synkit-0.0.1.dist-info/METADATA +148 -0
- synkit-0.0.1.dist-info/RECORD +63 -0
- synkit-0.0.1.dist-info/WHEEL +4 -0
- synkit-0.0.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
import networkx as nx
|
|
2
|
+
from joblib import Parallel, delayed
|
|
3
|
+
from typing import List, Dict, Any, Union
|
|
4
|
+
from collections import Counter, OrderedDict
|
|
5
|
+
from synkit.IO.debug import setup_logging
|
|
6
|
+
from synkit.Graph.Feature.graph_signature import GraphSignature
|
|
7
|
+
|
|
8
|
+
logger = setup_logging()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class GraphDescriptor:
|
|
12
|
+
def __init__(self) -> None:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
@staticmethod
|
|
16
|
+
def is_graph_empty(graph: Union[nx.Graph, dict, list, Any]) -> bool:
|
|
17
|
+
"""
|
|
18
|
+
Determine if a graph representation is empty.
|
|
19
|
+
|
|
20
|
+
Parameters:
|
|
21
|
+
- graph (Union[nx.Graph, dict, list, Any]): A graph representation which can be
|
|
22
|
+
a NetworkX graph, a dictionary, a list, or an object with an 'is_empty' method.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
- bool: True if the graph is empty, False otherwise.
|
|
26
|
+
|
|
27
|
+
Raises:
|
|
28
|
+
- TypeError: If the graph representation is not supported.
|
|
29
|
+
"""
|
|
30
|
+
if isinstance(graph, nx.Graph):
|
|
31
|
+
return graph.number_of_nodes() == 0
|
|
32
|
+
elif isinstance(graph, dict):
|
|
33
|
+
return len(graph) == 0
|
|
34
|
+
elif isinstance(graph, list):
|
|
35
|
+
return all(len(row) == 0 for row in graph)
|
|
36
|
+
elif hasattr(graph, "is_empty"):
|
|
37
|
+
return graph.is_empty()
|
|
38
|
+
else:
|
|
39
|
+
raise TypeError("Unsupported graph representation")
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
def is_acyclic_graph(G: nx.Graph) -> bool:
|
|
43
|
+
"""
|
|
44
|
+
Determines if the given graph is acyclic.
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
- G (nx.Graph): The graph to be checked.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
- bool: True if the graph is acyclic, False otherwise.
|
|
51
|
+
"""
|
|
52
|
+
GraphDescriptor._validate_graph_input(G)
|
|
53
|
+
return nx.is_tree(G) if not GraphDescriptor.is_graph_empty(G) else False
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def is_single_cyclic_graph(G: nx.Graph) -> bool:
|
|
57
|
+
"""
|
|
58
|
+
Determines if the given graph has exactly one cycle.
|
|
59
|
+
|
|
60
|
+
Parameters:
|
|
61
|
+
- G (nx.Graph): The graph to be checked.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
- bool: True if the graph is single cyclic, False otherwise.
|
|
65
|
+
"""
|
|
66
|
+
GraphDescriptor._validate_graph_input(G)
|
|
67
|
+
if GraphDescriptor.is_graph_empty(G) or not nx.is_connected(G):
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
cycles = nx.cycle_basis(G)
|
|
71
|
+
if cycles and set(G.nodes()) == {node for cycle in cycles for node in cycle}:
|
|
72
|
+
return G.number_of_edges() == G.number_of_nodes()
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def is_complex_cyclic_graph(G: nx.Graph) -> bool:
|
|
77
|
+
"""
|
|
78
|
+
Determines if the graph is complex cyclic with multiple cycles.
|
|
79
|
+
|
|
80
|
+
Parameters:
|
|
81
|
+
- G (nx.Graph): The graph to be checked.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
- bool: True if the graph is complex cyclic, False otherwise.
|
|
85
|
+
"""
|
|
86
|
+
GraphDescriptor._validate_graph_input(G)
|
|
87
|
+
if GraphDescriptor.is_graph_empty(G) or not nx.is_connected(G):
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
cycles = nx.minimum_cycle_basis(G)
|
|
91
|
+
nodes_in_cycles = {node for cycle in cycles for node in cycle}
|
|
92
|
+
return len(cycles) > 1 and nodes_in_cycles == set(G.nodes())
|
|
93
|
+
|
|
94
|
+
@staticmethod
|
|
95
|
+
def check_graph_type(G: nx.Graph) -> str:
|
|
96
|
+
"""
|
|
97
|
+
Classifies the graph as acyclic, single cyclic, or complex cyclic.
|
|
98
|
+
|
|
99
|
+
Parameters:
|
|
100
|
+
- G (nx.Graph): The graph to be checked.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
- str: The classification result.
|
|
104
|
+
"""
|
|
105
|
+
GraphDescriptor._validate_graph_input(G)
|
|
106
|
+
if GraphDescriptor.is_graph_empty(G):
|
|
107
|
+
return "Empty Graph"
|
|
108
|
+
elif GraphDescriptor.is_acyclic_graph(G):
|
|
109
|
+
return "Acyclic"
|
|
110
|
+
elif GraphDescriptor.is_single_cyclic_graph(G):
|
|
111
|
+
return "Single Cyclic"
|
|
112
|
+
elif GraphDescriptor.is_complex_cyclic_graph(G):
|
|
113
|
+
return "Combinatorial Cyclic"
|
|
114
|
+
else:
|
|
115
|
+
return "Complex Cyclic"
|
|
116
|
+
|
|
117
|
+
@staticmethod
|
|
118
|
+
def get_cycle_member_rings(G: nx.Graph, type="minimal") -> List[int]:
|
|
119
|
+
"""
|
|
120
|
+
Identifies all cycles in the given graph using cycle bases to ensure no overlap
|
|
121
|
+
and returns a list of the sizes of these cycles (member rings),
|
|
122
|
+
sorted in ascending order.
|
|
123
|
+
|
|
124
|
+
Parameters:
|
|
125
|
+
- G (nx.Graph): The NetworkX graph to be analyzed.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
- List[int]: A sorted list of cycle sizes (member rings) found in the graph.
|
|
129
|
+
"""
|
|
130
|
+
if not isinstance(G, nx.Graph):
|
|
131
|
+
raise TypeError("Input must be a networkx Graph object.")
|
|
132
|
+
|
|
133
|
+
if type == "minimal":
|
|
134
|
+
cycles = nx.minimum_cycle_basis(G)
|
|
135
|
+
else:
|
|
136
|
+
cycles = nx.cycle_basis(G)
|
|
137
|
+
member_rings = [len(cycle) for cycle in cycles]
|
|
138
|
+
|
|
139
|
+
member_rings.sort()
|
|
140
|
+
|
|
141
|
+
return member_rings
|
|
142
|
+
|
|
143
|
+
@staticmethod
|
|
144
|
+
def get_element_count(graph: nx.Graph) -> Dict[str, int]:
|
|
145
|
+
"""
|
|
146
|
+
Counts occurrences of each element in the graph nodes.
|
|
147
|
+
|
|
148
|
+
Parameters:
|
|
149
|
+
- graph (nx.Graph): A NetworkX graph with 'element' attribute in nodes.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
- Dict[str, int]: An ordered dictionary with element counts.
|
|
153
|
+
"""
|
|
154
|
+
element_counts = Counter(data["element"] for _, data in graph.nodes(data=True))
|
|
155
|
+
return OrderedDict(sorted(element_counts.items()))
|
|
156
|
+
|
|
157
|
+
@staticmethod
|
|
158
|
+
def get_descriptors(
|
|
159
|
+
entry: Dict,
|
|
160
|
+
reaction_centers: str = "RC",
|
|
161
|
+
its: str = "ITS",
|
|
162
|
+
condensed: bool = True,
|
|
163
|
+
) -> Dict:
|
|
164
|
+
"""
|
|
165
|
+
Enhance an entry dictionary with topology type and reaction type descriptors.
|
|
166
|
+
|
|
167
|
+
Parameters:
|
|
168
|
+
- entry (Dict): A dictionary with reaction data.
|
|
169
|
+
- reaction_centers (str): Key for accessing reaction center data.
|
|
170
|
+
- its (str): Key for accessing ITS (Intermediate Transition State) data.
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
- Dict: The enhanced entry with additional descriptors.
|
|
174
|
+
"""
|
|
175
|
+
graph = GraphDescriptor._extract_graph(entry, reaction_centers)
|
|
176
|
+
its_graph = GraphDescriptor._extract_graph(entry, its)
|
|
177
|
+
|
|
178
|
+
if not graph or not its_graph:
|
|
179
|
+
return entry # Early exit if graphs are missing
|
|
180
|
+
|
|
181
|
+
# Set initial topology descriptor for the reaction center graph
|
|
182
|
+
entry["topo"] = GraphDescriptor.check_graph_type(graph)
|
|
183
|
+
entry["cycle"] = GraphDescriptor.get_cycle_member_rings(graph)
|
|
184
|
+
entry["atom_count"] = GraphDescriptor.get_element_count(graph)
|
|
185
|
+
entry["its_count"] = GraphDescriptor.get_element_count(its_graph)
|
|
186
|
+
|
|
187
|
+
# Determine the reaction type based on the topology type
|
|
188
|
+
entry["rtype"] = (
|
|
189
|
+
"Elementary"
|
|
190
|
+
if entry["topo"] in ["Single Cyclic", "Acyclic"]
|
|
191
|
+
else "Complicated"
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
GraphDescriptor._adjust_cycle_and_step(entry, "cycle", entry["topo"])
|
|
195
|
+
entry["signature_rc"] = GraphSignature(graph).create_graph_signature()
|
|
196
|
+
|
|
197
|
+
# Initialize ITS descriptors and call adjust
|
|
198
|
+
topo_its = GraphDescriptor.check_graph_type(its_graph)
|
|
199
|
+
cycle_its = GraphDescriptor.get_cycle_member_rings(its_graph)
|
|
200
|
+
entry["cycle_its"] = cycle_its # Ensure key is initialized
|
|
201
|
+
GraphDescriptor._adjust_cycle_and_step(
|
|
202
|
+
entry, "cycle_its", topo_its, its_prefix="its"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
entry["signature_its"] = GraphSignature(its_graph).create_graph_signature()
|
|
206
|
+
|
|
207
|
+
return entry
|
|
208
|
+
|
|
209
|
+
@staticmethod
|
|
210
|
+
def _extract_graph(entry: Dict, key: str) -> Union[nx.Graph, None]:
|
|
211
|
+
"""
|
|
212
|
+
Extracts a graph from an entry dictionary based on the specified key.
|
|
213
|
+
|
|
214
|
+
Parameters:
|
|
215
|
+
- entry (Dict): The dictionary containing graph data.
|
|
216
|
+
- key (str): The key for accessing graph data.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
- Union[nx.Graph, None]: The extracted graph or None if unavailable.
|
|
220
|
+
"""
|
|
221
|
+
data = entry.get(key)
|
|
222
|
+
if isinstance(data, tuple):
|
|
223
|
+
try:
|
|
224
|
+
return data[2]
|
|
225
|
+
except IndexError:
|
|
226
|
+
logger.error(f"No graph data available at index 2 for entry {entry}")
|
|
227
|
+
elif isinstance(data, nx.Graph):
|
|
228
|
+
return data
|
|
229
|
+
else:
|
|
230
|
+
logger.error(f"Unsupported data type for {key} in entry {entry}")
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
@staticmethod
|
|
234
|
+
def _adjust_cycle_and_step(
|
|
235
|
+
entry: Dict, cycle_key: str, topo_type: str, its_prefix: str = ""
|
|
236
|
+
) -> None:
|
|
237
|
+
"""
|
|
238
|
+
Adjusts cycle and step descriptors based on the graph topology type.
|
|
239
|
+
|
|
240
|
+
Parameters:
|
|
241
|
+
- entry (Dict): The entry dictionary to update.
|
|
242
|
+
- cycle_key (str): The key for the cycle descriptor.
|
|
243
|
+
- topo_type (str): The topology type.
|
|
244
|
+
- its_prefix (str): Prefix for ITS-specific descriptors.
|
|
245
|
+
"""
|
|
246
|
+
step_key = f"rstep_{its_prefix}" if its_prefix else "rstep"
|
|
247
|
+
|
|
248
|
+
# Initialize the step key in the dictionary to avoid KeyError
|
|
249
|
+
if cycle_key not in entry:
|
|
250
|
+
entry[cycle_key] = []
|
|
251
|
+
|
|
252
|
+
if topo_type == "Acyclic":
|
|
253
|
+
entry[cycle_key] = [0]
|
|
254
|
+
elif topo_type == "Complex Cyclic":
|
|
255
|
+
entry[cycle_key] = [0] + entry[cycle_key]
|
|
256
|
+
|
|
257
|
+
entry[step_key] = len(entry[cycle_key])
|
|
258
|
+
|
|
259
|
+
@staticmethod
|
|
260
|
+
def _validate_graph_input(G: nx.Graph) -> None:
|
|
261
|
+
"""
|
|
262
|
+
Validates that the input is a NetworkX graph.
|
|
263
|
+
|
|
264
|
+
Parameters:
|
|
265
|
+
- G (nx.Graph): The graph to validate.
|
|
266
|
+
|
|
267
|
+
Raises:
|
|
268
|
+
- TypeError: If G is not a NetworkX Graph.
|
|
269
|
+
"""
|
|
270
|
+
if not isinstance(G, nx.Graph):
|
|
271
|
+
raise TypeError("Input must be a NetworkX Graph object.")
|
|
272
|
+
|
|
273
|
+
@staticmethod
|
|
274
|
+
def process_entries_in_parallel(
|
|
275
|
+
entries: List[Dict],
|
|
276
|
+
reaction_centers: str = "RC",
|
|
277
|
+
its: str = "ITS",
|
|
278
|
+
condensed: bool = True,
|
|
279
|
+
n_jobs: int = 4,
|
|
280
|
+
verbose: int = 0,
|
|
281
|
+
) -> List[Dict]:
|
|
282
|
+
"""
|
|
283
|
+
Processes a list of entries in parallel to enhance each entry with descriptors.
|
|
284
|
+
|
|
285
|
+
Parameters:
|
|
286
|
+
- entries (List[Dict]): List of dictionaries containing reaction data to enhance.
|
|
287
|
+
- reaction_centers (str): Key to retrieve reaction center graph data from each
|
|
288
|
+
entry dictionary.
|
|
289
|
+
- its (str): Key to retrieve ITS (Intermediate Transition State) graph data from
|
|
290
|
+
each entry dictionary.
|
|
291
|
+
- condensed (bool): If True, condenses node signatures with counts.
|
|
292
|
+
- n_jobs (int): Number of jobs to run in parallel. -1 uses all processors.
|
|
293
|
+
- verbose (int): The verbosity level for joblib's Parallel.
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
- List[Dict]: A list of enhanced dictionaries with added descriptors.
|
|
297
|
+
"""
|
|
298
|
+
return Parallel(n_jobs=n_jobs, verbose=verbose)(
|
|
299
|
+
delayed(GraphDescriptor.get_descriptors)(
|
|
300
|
+
entry, reaction_centers, its, condensed
|
|
301
|
+
)
|
|
302
|
+
for entry in entries
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def check_graph_connectivity(graph: nx.Graph) -> str:
|
|
307
|
+
"""
|
|
308
|
+
Check the connectivity of a NetworkX graph.
|
|
309
|
+
|
|
310
|
+
This function assesses whether all nodes in the graph are connected by some path,
|
|
311
|
+
applicable to undirected graphs.
|
|
312
|
+
|
|
313
|
+
Parameters:
|
|
314
|
+
- graph (nx.Graph): A NetworkX graph object.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
- str: Returns 'Connected' if the graph is connected, otherwise 'Disconnected'.
|
|
318
|
+
|
|
319
|
+
Raises:
|
|
320
|
+
- NetworkXNotImplemented: If graph is directed and does not support is_connected.
|
|
321
|
+
"""
|
|
322
|
+
if nx.is_connected(graph):
|
|
323
|
+
return "Connected"
|
|
324
|
+
else:
|
|
325
|
+
return "Disconnected."
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import networkx as nx
|
|
2
|
+
import hashlib
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class GraphFP:
|
|
7
|
+
def __init__(
|
|
8
|
+
self, graph: nx.Graph, nBits: int = 1024, hash_alg: str = "sha256"
|
|
9
|
+
) -> None:
|
|
10
|
+
"""
|
|
11
|
+
Initialize the GraphFP class to create binary fingerprints based on various graph
|
|
12
|
+
characteristics.
|
|
13
|
+
|
|
14
|
+
Parameters:
|
|
15
|
+
- graph (nx.Graph): Graph on which to perform analysis.
|
|
16
|
+
- nBits (int): Size of the binary fingerprint in bits.
|
|
17
|
+
- hash_alg (str): Cryptographic hash function used for hashing.
|
|
18
|
+
"""
|
|
19
|
+
self.graph = graph
|
|
20
|
+
self.nBits = nBits
|
|
21
|
+
self.hash_alg = hash_alg
|
|
22
|
+
self.hash_function = getattr(hashlib, self.hash_alg)
|
|
23
|
+
|
|
24
|
+
def fingerprint(self, method: str) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Generate a binary string fingerprint of the graph using the specified method.
|
|
27
|
+
|
|
28
|
+
Parameters:
|
|
29
|
+
- method (str): The method to use for fingerprinting
|
|
30
|
+
('spectrum', 'adjacency', 'degree', 'motif')
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
- str: A binary string of length `nBits` that represents the fingerprint of
|
|
34
|
+
the graph.
|
|
35
|
+
"""
|
|
36
|
+
if method == "spectrum":
|
|
37
|
+
fp = self._spectrum_fp()
|
|
38
|
+
elif method == "adjacency":
|
|
39
|
+
fp = self._adjacency_fp()
|
|
40
|
+
elif method == "degree":
|
|
41
|
+
fp = self._degree_sequence_fp()
|
|
42
|
+
elif method == "motif":
|
|
43
|
+
fp = self._motif_count_fp()
|
|
44
|
+
else:
|
|
45
|
+
raise ValueError("Unsupported fingerprinting method.")
|
|
46
|
+
|
|
47
|
+
# If the fingerprint is shorter than nBits, use iterative deepening
|
|
48
|
+
if len(fp) < self.nBits:
|
|
49
|
+
fp += self.iterative_deepening(self.nBits - len(fp))
|
|
50
|
+
|
|
51
|
+
return fp[: self.nBits]
|
|
52
|
+
|
|
53
|
+
def _spectrum_fp(self) -> str:
|
|
54
|
+
# Graph spectrum (eigenvalues of the adjacency matrix)
|
|
55
|
+
eigenvalues = np.linalg.eigvals(nx.adjacency_matrix(self.graph).todense())
|
|
56
|
+
sorted_eigenvalues = np.sort(eigenvalues)[: self.nBits]
|
|
57
|
+
eigen_str = "".join(
|
|
58
|
+
bin(int(abs(eig)))[2:].zfill(8) for eig in sorted_eigenvalues
|
|
59
|
+
)
|
|
60
|
+
return eigen_str[: self.nBits]
|
|
61
|
+
|
|
62
|
+
def _adjacency_fp(self) -> str:
|
|
63
|
+
# Adjacency matrix flattened
|
|
64
|
+
adj_matrix = nx.adjacency_matrix(self.graph).todense().flatten()
|
|
65
|
+
adj_str = "".join(str(int(x)) for x in adj_matrix)
|
|
66
|
+
return adj_str[: self.nBits]
|
|
67
|
+
|
|
68
|
+
def _degree_sequence_fp(self) -> str:
|
|
69
|
+
# Degree sequence
|
|
70
|
+
degrees = sorted([d for n, d in self.graph.degree()], reverse=True)
|
|
71
|
+
degree_str = "".join(bin(d)[2:].zfill(8) for d in degrees)
|
|
72
|
+
return degree_str[: self.nBits]
|
|
73
|
+
|
|
74
|
+
def _motif_count_fp(self) -> str:
|
|
75
|
+
# Motif counts (e.g., number of triangles)
|
|
76
|
+
triangles = sum(nx.triangles(self.graph).values()) // 3
|
|
77
|
+
triangle_str = bin(triangles)[2:].zfill(self.nBits)
|
|
78
|
+
return triangle_str[: self.nBits]
|
|
79
|
+
|
|
80
|
+
def iterative_deepening(self, remaining_bits: int) -> str:
|
|
81
|
+
"""
|
|
82
|
+
Extend the hash length using iterative hashing until the desired bit length is
|
|
83
|
+
achieved.
|
|
84
|
+
|
|
85
|
+
Parameters:
|
|
86
|
+
- remaining_bits (int): Number of bits needed to complete the fingerprint
|
|
87
|
+
to `nBits`.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
- str: Additional binary data to achieve the desired hash length.
|
|
91
|
+
"""
|
|
92
|
+
additional_data = ""
|
|
93
|
+
hash_obj = self.hash_function()
|
|
94
|
+
while len(additional_data) * 4 < remaining_bits:
|
|
95
|
+
hash_obj.update(additional_data.encode())
|
|
96
|
+
additional_data += hash_obj.hexdigest()
|
|
97
|
+
return bin(int(additional_data, 16))[2:][:remaining_bits]
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import networkx as nx
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class GraphSignature:
|
|
6
|
+
"""
|
|
7
|
+
Provides methods to generate canonical signatures for graph edges (with flexible 'order' and 'state' attributes,
|
|
8
|
+
and node degrees/neighbor information), various spectral invariants, adjacency matrix, and complete graphs.
|
|
9
|
+
Aims for high uniqueness without relying solely on isomorphism checks.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, graph: nx.Graph):
|
|
13
|
+
"""
|
|
14
|
+
Initializes the GraphSignature class with a specified graph.
|
|
15
|
+
|
|
16
|
+
Parameters:
|
|
17
|
+
- graph (nx.Graph): A NetworkX graph instance.
|
|
18
|
+
"""
|
|
19
|
+
self.graph = graph
|
|
20
|
+
self._validate_graph()
|
|
21
|
+
|
|
22
|
+
def _validate_graph(self):
|
|
23
|
+
"""
|
|
24
|
+
Validates that all nodes have the required attributes ('element' and 'charge'),
|
|
25
|
+
and all edges have the required 'order' attribute as int, float, or tuple of two floats,
|
|
26
|
+
and optionally the 'state' attribute.
|
|
27
|
+
|
|
28
|
+
Raises:
|
|
29
|
+
- ValueError: If any node is missing the 'element' or 'charge' attribute,
|
|
30
|
+
or if any edge is missing the 'order' attribute or has an invalid type.
|
|
31
|
+
"""
|
|
32
|
+
for node, data in self.graph.nodes(data=True):
|
|
33
|
+
if "element" not in data:
|
|
34
|
+
raise ValueError(f"Node {node} is missing the 'element' attribute.")
|
|
35
|
+
if "charge" not in data:
|
|
36
|
+
raise ValueError(f"Node {node} is missing the 'charge' attribute.")
|
|
37
|
+
|
|
38
|
+
for u, v, data in self.graph.edges(data=True):
|
|
39
|
+
if "order" not in data:
|
|
40
|
+
raise ValueError(f"Edge ({u}, {v}) is missing the 'order' attribute.")
|
|
41
|
+
order = data["order"]
|
|
42
|
+
if isinstance(order, tuple):
|
|
43
|
+
if len(order) != 2 or not all(
|
|
44
|
+
isinstance(o, (int, float)) for o in order
|
|
45
|
+
):
|
|
46
|
+
raise ValueError(
|
|
47
|
+
f"Edge ({u}, {v}) has an invalid 'order'. It must be a tuple of two ints/floats."
|
|
48
|
+
)
|
|
49
|
+
elif not isinstance(order, (int, float)):
|
|
50
|
+
raise ValueError(
|
|
51
|
+
f"Edge ({u}, {v}) has an invalid 'order'. It must be an int, float, or a tuple of two ints/floats."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Optional: Validate 'state' attribute if present
|
|
55
|
+
state = data.get("state", "steady") # Default to 'steady' if missing
|
|
56
|
+
if state not in {"break", "form", "steady"}:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"Edge ({u}, {v}) has an invalid 'state'. It must be 'break', 'form', or 'steady'."
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def create_edge_signature(
|
|
62
|
+
self, include_neighbors: bool = False, max_hop: int = 2
|
|
63
|
+
) -> str:
|
|
64
|
+
"""
|
|
65
|
+
Generates a canonical edge signature by formatting each edge with sorted node elements (including charge),
|
|
66
|
+
node degrees, bond order, bond state, and optionally including neighbor information and topological context.
|
|
67
|
+
|
|
68
|
+
Parameters:
|
|
69
|
+
- include_neighbors (bool): Whether to include neighbors' details in the edge signature.
|
|
70
|
+
- max_hop (int): Maximum number of hops to include for neighbor-level structural information.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
- str: A concatenated and sorted string of edge representations.
|
|
74
|
+
"""
|
|
75
|
+
edge_signature_parts = []
|
|
76
|
+
|
|
77
|
+
for u, v, data in self.graph.edges(data=True):
|
|
78
|
+
# Retrieve bond order (default to (1.0, 1.0) if missing)
|
|
79
|
+
order = data.get("order", (1.0, 1.0))
|
|
80
|
+
|
|
81
|
+
# Format order as a tuple (default or actual value)
|
|
82
|
+
if isinstance(order, tuple):
|
|
83
|
+
order_str = f"{{{order[0]:.1f},{order[1]:.1f}}}"
|
|
84
|
+
else:
|
|
85
|
+
order_str = f"{float(order):.1f}"
|
|
86
|
+
|
|
87
|
+
# Get node elements and charges for both nodes
|
|
88
|
+
node1_element = self.graph.nodes[u].get(
|
|
89
|
+
"element", "X"
|
|
90
|
+
) # Default to 'X' if missing
|
|
91
|
+
node1_charge = self.graph.nodes[u].get(
|
|
92
|
+
"charge", 0
|
|
93
|
+
) # Default to 0 if missing
|
|
94
|
+
node2_element = self.graph.nodes[v].get("element", "X")
|
|
95
|
+
node2_charge = self.graph.nodes[v].get("charge", 0)
|
|
96
|
+
|
|
97
|
+
# Construct node representation with element and charge
|
|
98
|
+
node1 = f"{node1_element}{node1_charge}"
|
|
99
|
+
node2 = f"{node2_element}{node2_charge}"
|
|
100
|
+
|
|
101
|
+
# Optionally include neighbors in the signature
|
|
102
|
+
if include_neighbors:
|
|
103
|
+
neighbors_u = sorted(
|
|
104
|
+
[
|
|
105
|
+
f"{self.graph.nodes[neighbor].get('element', 'X')}{self.graph.nodes[neighbor].get('charge', 0)}"
|
|
106
|
+
+ f"d{self.graph.degree(neighbor)}"
|
|
107
|
+
for neighbor in self.graph.neighbors(u)
|
|
108
|
+
]
|
|
109
|
+
)
|
|
110
|
+
neighbors_v = sorted(
|
|
111
|
+
[
|
|
112
|
+
f"{self.graph.nodes[neighbor].get('element', 'X')}{self.graph.nodes[neighbor].get('charge', 0)}"
|
|
113
|
+
+ f"d{self.graph.degree(neighbor)}"
|
|
114
|
+
for neighbor in self.graph.neighbors(v)
|
|
115
|
+
]
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Represent neighbors within square brackets
|
|
119
|
+
node1_neighbors = "".join(neighbors_u)
|
|
120
|
+
node2_neighbors = "".join(neighbors_v)
|
|
121
|
+
node1 = f"{node1}[{node1_neighbors}]"
|
|
122
|
+
node2 = f"{node2}[{node2_neighbors}]"
|
|
123
|
+
|
|
124
|
+
# Include k-hop neighborhood information
|
|
125
|
+
if max_hop > 1:
|
|
126
|
+
node1_neighbors_khop = self._get_khop_neighbors(u, max_hop)
|
|
127
|
+
node2_neighbors_khop = self._get_khop_neighbors(v, max_hop)
|
|
128
|
+
node1 += f"[{node1_neighbors_khop}]"
|
|
129
|
+
node2 += f"[{node2_neighbors_khop}]"
|
|
130
|
+
|
|
131
|
+
# Sort nodes to ensure consistency in edge signature (avoid direction dependency)
|
|
132
|
+
node1, node2 = sorted([node1, node2])
|
|
133
|
+
|
|
134
|
+
# Format the edge signature and append it
|
|
135
|
+
edge_part = f"{node1}{order_str}{node2}"
|
|
136
|
+
edge_signature_parts.append(edge_part)
|
|
137
|
+
|
|
138
|
+
# Sort all edge signatures to ensure consistency in the final representation
|
|
139
|
+
return "/".join(sorted(edge_signature_parts))
|
|
140
|
+
|
|
141
|
+
def _get_khop_neighbors(self, node, max_hop):
|
|
142
|
+
"""
|
|
143
|
+
Retrieves the k-hop neighborhood information for a given node.
|
|
144
|
+
|
|
145
|
+
Parameters:
|
|
146
|
+
- node (int): The node for which to get neighborhood information.
|
|
147
|
+
- max_hop (int): Maximum number of hops for neighborhood exploration.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
- str: A concatenated string representing the k-hop neighborhood information.
|
|
151
|
+
"""
|
|
152
|
+
k_hop_neighbors = []
|
|
153
|
+
current_hop_neighbors = [node]
|
|
154
|
+
for _ in range(max_hop):
|
|
155
|
+
next_hop_neighbors = []
|
|
156
|
+
for n in current_hop_neighbors:
|
|
157
|
+
next_hop_neighbors.extend(list(self.graph.neighbors(n)))
|
|
158
|
+
# Filter out already seen nodes to avoid loops
|
|
159
|
+
next_hop_neighbors = set(next_hop_neighbors) - set(k_hop_neighbors)
|
|
160
|
+
k_hop_neighbors.extend(next_hop_neighbors)
|
|
161
|
+
current_hop_neighbors = next_hop_neighbors
|
|
162
|
+
|
|
163
|
+
# Return sorted k-hop neighborhood info
|
|
164
|
+
return "".join(
|
|
165
|
+
sorted(
|
|
166
|
+
[
|
|
167
|
+
f"{self.graph.nodes[neighbor].get('element', 'X')}{self.graph.nodes[neighbor].get('charge', 0)}"
|
|
168
|
+
for neighbor in k_hop_neighbors
|
|
169
|
+
]
|
|
170
|
+
)
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
def create_wl_hash(self, iterations: int = 3) -> str:
|
|
174
|
+
"""
|
|
175
|
+
Generates a Weisfeiler-Lehman (WL) hash for the graph to capture its structural features.
|
|
176
|
+
|
|
177
|
+
Parameters:
|
|
178
|
+
- iterations (int): Number of WL iterations to perform.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
- str: A hexadecimal hash representing the WL feature.
|
|
182
|
+
"""
|
|
183
|
+
# Initialize labels with both 'element' and 'charge'
|
|
184
|
+
labels = {
|
|
185
|
+
node: f"{data['element']}{data.get('charge', 0)}"
|
|
186
|
+
for node, data in self.graph.nodes(data=True)
|
|
187
|
+
}
|
|
188
|
+
for _ in range(iterations):
|
|
189
|
+
new_labels = {}
|
|
190
|
+
for node in self.graph.nodes():
|
|
191
|
+
# Gather sorted labels of neighbors
|
|
192
|
+
neighbor_labels = sorted(
|
|
193
|
+
labels[neighbor] for neighbor in self.graph.neighbors(node)
|
|
194
|
+
)
|
|
195
|
+
# Concatenate current label with neighbor labels
|
|
196
|
+
concatenated = labels[node] + "".join(neighbor_labels)
|
|
197
|
+
# Hash the concatenated string to obtain a new label
|
|
198
|
+
new_label = hashlib.sha256(concatenated.encode()).hexdigest()
|
|
199
|
+
new_labels[node] = new_label
|
|
200
|
+
labels = new_labels
|
|
201
|
+
# Aggregate all node labels into a sorted string and hash it
|
|
202
|
+
sorted_labels = sorted(labels.values())
|
|
203
|
+
aggregated = "".join(sorted_labels)
|
|
204
|
+
graph_hash = hashlib.sha256(aggregated.encode()).hexdigest()
|
|
205
|
+
return graph_hash
|
|
206
|
+
|
|
207
|
+
def create_graph_signature(
|
|
208
|
+
self,
|
|
209
|
+
include_wl_hash: bool = True,
|
|
210
|
+
include_neighbors: bool = True,
|
|
211
|
+
max_hop: int = 1,
|
|
212
|
+
) -> str:
|
|
213
|
+
"""
|
|
214
|
+
Combines edge, various spectral invariants, and WL hash into a single comprehensive graph signature.
|
|
215
|
+
|
|
216
|
+
Parameters:
|
|
217
|
+
- include_wl_hash (bool): Whether to include the Weisfeiler-Lehman hash.
|
|
218
|
+
- include_spectral (bool): Whether to include spectral invariants.
|
|
219
|
+
- include_combined_hash (bool): Whether to include the combined hash.
|
|
220
|
+
- include_neighbors (bool): Whether to include neighbor information in edge signatures.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
- str: A concatenated string representing the complete graph signature.
|
|
224
|
+
"""
|
|
225
|
+
signatures = []
|
|
226
|
+
|
|
227
|
+
if include_wl_hash:
|
|
228
|
+
wl_signature = self.create_wl_hash()
|
|
229
|
+
signatures.append(f"{wl_signature}")
|
|
230
|
+
|
|
231
|
+
edge_signature = self.create_edge_signature(
|
|
232
|
+
include_neighbors=include_neighbors, max_hop=max_hop
|
|
233
|
+
)
|
|
234
|
+
signatures.append(f"{edge_signature}")
|
|
235
|
+
|
|
236
|
+
return "|".join(signatures)
|