synkit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. synkit/Chem/Fingerprint/__init__.py +0 -0
  2. synkit/Chem/Fingerprint/fp_calculator.py +122 -0
  3. synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
  4. synkit/Chem/Fingerprint/transformation_fp.py +79 -0
  5. synkit/Chem/Molecule/__init__.py +0 -0
  6. synkit/Chem/Molecule/standardize.py +137 -0
  7. synkit/Chem/Reaction/__init__.py +0 -0
  8. synkit/Chem/Reaction/balance_check.py +162 -0
  9. synkit/Chem/Reaction/cleanning.py +59 -0
  10. synkit/Chem/Reaction/deionize.py +289 -0
  11. synkit/Chem/Reaction/neutralize.py +256 -0
  12. synkit/Chem/Reaction/reagent.py +102 -0
  13. synkit/Chem/Reaction/standardize.py +157 -0
  14. synkit/Chem/Reaction/tautomerize.py +168 -0
  15. synkit/Graph/Cluster/__init__.py +0 -0
  16. synkit/Graph/Cluster/morphism.py +83 -0
  17. synkit/Graph/Feature/__init__.py +0 -0
  18. synkit/Graph/Feature/graph_descriptors.py +325 -0
  19. synkit/Graph/Feature/graph_fps.py +97 -0
  20. synkit/Graph/Feature/graph_signature.py +236 -0
  21. synkit/Graph/Feature/hash_fps.py +130 -0
  22. synkit/Graph/Feature/morgan_fps.py +87 -0
  23. synkit/Graph/Feature/path_fps.py +82 -0
  24. synkit/Graph/__init.py +0 -0
  25. synkit/IO/__init__.py +0 -0
  26. synkit/IO/chem_converter.py +231 -0
  27. synkit/IO/data_io.py +277 -0
  28. synkit/IO/data_process.py +49 -0
  29. synkit/IO/debug.py +78 -0
  30. synkit/IO/dg_to_gml.py +124 -0
  31. synkit/IO/gml_to_nx.py +119 -0
  32. synkit/IO/graph_to_mol.py +110 -0
  33. synkit/IO/mol_to_graph.py +282 -0
  34. synkit/IO/nx_to_gml.py +200 -0
  35. synkit/IO/parse_rule.py +172 -0
  36. synkit/IO/smiles_to_id.py +119 -0
  37. synkit/ITS/_misc.py +280 -0
  38. synkit/ITS/aam_validator.py +254 -0
  39. synkit/ITS/its_builder.py +94 -0
  40. synkit/ITS/its_construction.py +213 -0
  41. synkit/ITS/normalize_aam.py +183 -0
  42. synkit/ITS/partial_expand.py +170 -0
  43. synkit/Reactor/__init__.py +0 -0
  44. synkit/Reactor/core_engine.py +164 -0
  45. synkit/Reactor/inference.py +73 -0
  46. synkit/Reactor/multi_step.py +227 -0
  47. synkit/Reactor/multi_step_aam.py +82 -0
  48. synkit/Reactor/reagent.py +95 -0
  49. synkit/Reactor/rule_apply.py +81 -0
  50. synkit/Vis/__init__.py +0 -0
  51. synkit/Vis/chemical_graph_visualizer.py +378 -0
  52. synkit/Vis/chemical_reaction_visualizer.py +133 -0
  53. synkit/Vis/chemical_space.py +83 -0
  54. synkit/Vis/embedding.py +92 -0
  55. synkit/Vis/graph_visualizer.py +286 -0
  56. synkit/Vis/pdf_writer.py +143 -0
  57. synkit/Vis/rsmi_to_fig.py +169 -0
  58. synkit/__init__.py +0 -0
  59. synkit/_misc.py +181 -0
  60. synkit-0.0.1.dist-info/METADATA +148 -0
  61. synkit-0.0.1.dist-info/RECORD +63 -0
  62. synkit-0.0.1.dist-info/WHEEL +4 -0
  63. synkit-0.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,325 @@
1
+ import networkx as nx
2
+ from joblib import Parallel, delayed
3
+ from typing import List, Dict, Any, Union
4
+ from collections import Counter, OrderedDict
5
+ from synkit.IO.debug import setup_logging
6
+ from synkit.Graph.Feature.graph_signature import GraphSignature
7
+
8
+ logger = setup_logging()
9
+
10
+
11
+ class GraphDescriptor:
12
+ def __init__(self) -> None:
13
+ pass
14
+
15
+ @staticmethod
16
+ def is_graph_empty(graph: Union[nx.Graph, dict, list, Any]) -> bool:
17
+ """
18
+ Determine if a graph representation is empty.
19
+
20
+ Parameters:
21
+ - graph (Union[nx.Graph, dict, list, Any]): A graph representation which can be
22
+ a NetworkX graph, a dictionary, a list, or an object with an 'is_empty' method.
23
+
24
+ Returns:
25
+ - bool: True if the graph is empty, False otherwise.
26
+
27
+ Raises:
28
+ - TypeError: If the graph representation is not supported.
29
+ """
30
+ if isinstance(graph, nx.Graph):
31
+ return graph.number_of_nodes() == 0
32
+ elif isinstance(graph, dict):
33
+ return len(graph) == 0
34
+ elif isinstance(graph, list):
35
+ return all(len(row) == 0 for row in graph)
36
+ elif hasattr(graph, "is_empty"):
37
+ return graph.is_empty()
38
+ else:
39
+ raise TypeError("Unsupported graph representation")
40
+
41
+ @staticmethod
42
+ def is_acyclic_graph(G: nx.Graph) -> bool:
43
+ """
44
+ Determines if the given graph is acyclic.
45
+
46
+ Parameters:
47
+ - G (nx.Graph): The graph to be checked.
48
+
49
+ Returns:
50
+ - bool: True if the graph is acyclic, False otherwise.
51
+ """
52
+ GraphDescriptor._validate_graph_input(G)
53
+ return nx.is_tree(G) if not GraphDescriptor.is_graph_empty(G) else False
54
+
55
+ @staticmethod
56
+ def is_single_cyclic_graph(G: nx.Graph) -> bool:
57
+ """
58
+ Determines if the given graph has exactly one cycle.
59
+
60
+ Parameters:
61
+ - G (nx.Graph): The graph to be checked.
62
+
63
+ Returns:
64
+ - bool: True if the graph is single cyclic, False otherwise.
65
+ """
66
+ GraphDescriptor._validate_graph_input(G)
67
+ if GraphDescriptor.is_graph_empty(G) or not nx.is_connected(G):
68
+ return False
69
+
70
+ cycles = nx.cycle_basis(G)
71
+ if cycles and set(G.nodes()) == {node for cycle in cycles for node in cycle}:
72
+ return G.number_of_edges() == G.number_of_nodes()
73
+ return False
74
+
75
+ @staticmethod
76
+ def is_complex_cyclic_graph(G: nx.Graph) -> bool:
77
+ """
78
+ Determines if the graph is complex cyclic with multiple cycles.
79
+
80
+ Parameters:
81
+ - G (nx.Graph): The graph to be checked.
82
+
83
+ Returns:
84
+ - bool: True if the graph is complex cyclic, False otherwise.
85
+ """
86
+ GraphDescriptor._validate_graph_input(G)
87
+ if GraphDescriptor.is_graph_empty(G) or not nx.is_connected(G):
88
+ return False
89
+
90
+ cycles = nx.minimum_cycle_basis(G)
91
+ nodes_in_cycles = {node for cycle in cycles for node in cycle}
92
+ return len(cycles) > 1 and nodes_in_cycles == set(G.nodes())
93
+
94
+ @staticmethod
95
+ def check_graph_type(G: nx.Graph) -> str:
96
+ """
97
+ Classifies the graph as acyclic, single cyclic, or complex cyclic.
98
+
99
+ Parameters:
100
+ - G (nx.Graph): The graph to be checked.
101
+
102
+ Returns:
103
+ - str: The classification result.
104
+ """
105
+ GraphDescriptor._validate_graph_input(G)
106
+ if GraphDescriptor.is_graph_empty(G):
107
+ return "Empty Graph"
108
+ elif GraphDescriptor.is_acyclic_graph(G):
109
+ return "Acyclic"
110
+ elif GraphDescriptor.is_single_cyclic_graph(G):
111
+ return "Single Cyclic"
112
+ elif GraphDescriptor.is_complex_cyclic_graph(G):
113
+ return "Combinatorial Cyclic"
114
+ else:
115
+ return "Complex Cyclic"
116
+
117
+ @staticmethod
118
+ def get_cycle_member_rings(G: nx.Graph, type="minimal") -> List[int]:
119
+ """
120
+ Identifies all cycles in the given graph using cycle bases to ensure no overlap
121
+ and returns a list of the sizes of these cycles (member rings),
122
+ sorted in ascending order.
123
+
124
+ Parameters:
125
+ - G (nx.Graph): The NetworkX graph to be analyzed.
126
+
127
+ Returns:
128
+ - List[int]: A sorted list of cycle sizes (member rings) found in the graph.
129
+ """
130
+ if not isinstance(G, nx.Graph):
131
+ raise TypeError("Input must be a networkx Graph object.")
132
+
133
+ if type == "minimal":
134
+ cycles = nx.minimum_cycle_basis(G)
135
+ else:
136
+ cycles = nx.cycle_basis(G)
137
+ member_rings = [len(cycle) for cycle in cycles]
138
+
139
+ member_rings.sort()
140
+
141
+ return member_rings
142
+
143
+ @staticmethod
144
+ def get_element_count(graph: nx.Graph) -> Dict[str, int]:
145
+ """
146
+ Counts occurrences of each element in the graph nodes.
147
+
148
+ Parameters:
149
+ - graph (nx.Graph): A NetworkX graph with 'element' attribute in nodes.
150
+
151
+ Returns:
152
+ - Dict[str, int]: An ordered dictionary with element counts.
153
+ """
154
+ element_counts = Counter(data["element"] for _, data in graph.nodes(data=True))
155
+ return OrderedDict(sorted(element_counts.items()))
156
+
157
+ @staticmethod
158
+ def get_descriptors(
159
+ entry: Dict,
160
+ reaction_centers: str = "RC",
161
+ its: str = "ITS",
162
+ condensed: bool = True,
163
+ ) -> Dict:
164
+ """
165
+ Enhance an entry dictionary with topology type and reaction type descriptors.
166
+
167
+ Parameters:
168
+ - entry (Dict): A dictionary with reaction data.
169
+ - reaction_centers (str): Key for accessing reaction center data.
170
+ - its (str): Key for accessing ITS (Intermediate Transition State) data.
171
+
172
+ Returns:
173
+ - Dict: The enhanced entry with additional descriptors.
174
+ """
175
+ graph = GraphDescriptor._extract_graph(entry, reaction_centers)
176
+ its_graph = GraphDescriptor._extract_graph(entry, its)
177
+
178
+ if not graph or not its_graph:
179
+ return entry # Early exit if graphs are missing
180
+
181
+ # Set initial topology descriptor for the reaction center graph
182
+ entry["topo"] = GraphDescriptor.check_graph_type(graph)
183
+ entry["cycle"] = GraphDescriptor.get_cycle_member_rings(graph)
184
+ entry["atom_count"] = GraphDescriptor.get_element_count(graph)
185
+ entry["its_count"] = GraphDescriptor.get_element_count(its_graph)
186
+
187
+ # Determine the reaction type based on the topology type
188
+ entry["rtype"] = (
189
+ "Elementary"
190
+ if entry["topo"] in ["Single Cyclic", "Acyclic"]
191
+ else "Complicated"
192
+ )
193
+
194
+ GraphDescriptor._adjust_cycle_and_step(entry, "cycle", entry["topo"])
195
+ entry["signature_rc"] = GraphSignature(graph).create_graph_signature()
196
+
197
+ # Initialize ITS descriptors and call adjust
198
+ topo_its = GraphDescriptor.check_graph_type(its_graph)
199
+ cycle_its = GraphDescriptor.get_cycle_member_rings(its_graph)
200
+ entry["cycle_its"] = cycle_its # Ensure key is initialized
201
+ GraphDescriptor._adjust_cycle_and_step(
202
+ entry, "cycle_its", topo_its, its_prefix="its"
203
+ )
204
+
205
+ entry["signature_its"] = GraphSignature(its_graph).create_graph_signature()
206
+
207
+ return entry
208
+
209
+ @staticmethod
210
+ def _extract_graph(entry: Dict, key: str) -> Union[nx.Graph, None]:
211
+ """
212
+ Extracts a graph from an entry dictionary based on the specified key.
213
+
214
+ Parameters:
215
+ - entry (Dict): The dictionary containing graph data.
216
+ - key (str): The key for accessing graph data.
217
+
218
+ Returns:
219
+ - Union[nx.Graph, None]: The extracted graph or None if unavailable.
220
+ """
221
+ data = entry.get(key)
222
+ if isinstance(data, tuple):
223
+ try:
224
+ return data[2]
225
+ except IndexError:
226
+ logger.error(f"No graph data available at index 2 for entry {entry}")
227
+ elif isinstance(data, nx.Graph):
228
+ return data
229
+ else:
230
+ logger.error(f"Unsupported data type for {key} in entry {entry}")
231
+ return None
232
+
233
+ @staticmethod
234
+ def _adjust_cycle_and_step(
235
+ entry: Dict, cycle_key: str, topo_type: str, its_prefix: str = ""
236
+ ) -> None:
237
+ """
238
+ Adjusts cycle and step descriptors based on the graph topology type.
239
+
240
+ Parameters:
241
+ - entry (Dict): The entry dictionary to update.
242
+ - cycle_key (str): The key for the cycle descriptor.
243
+ - topo_type (str): The topology type.
244
+ - its_prefix (str): Prefix for ITS-specific descriptors.
245
+ """
246
+ step_key = f"rstep_{its_prefix}" if its_prefix else "rstep"
247
+
248
+ # Initialize the step key in the dictionary to avoid KeyError
249
+ if cycle_key not in entry:
250
+ entry[cycle_key] = []
251
+
252
+ if topo_type == "Acyclic":
253
+ entry[cycle_key] = [0]
254
+ elif topo_type == "Complex Cyclic":
255
+ entry[cycle_key] = [0] + entry[cycle_key]
256
+
257
+ entry[step_key] = len(entry[cycle_key])
258
+
259
+ @staticmethod
260
+ def _validate_graph_input(G: nx.Graph) -> None:
261
+ """
262
+ Validates that the input is a NetworkX graph.
263
+
264
+ Parameters:
265
+ - G (nx.Graph): The graph to validate.
266
+
267
+ Raises:
268
+ - TypeError: If G is not a NetworkX Graph.
269
+ """
270
+ if not isinstance(G, nx.Graph):
271
+ raise TypeError("Input must be a NetworkX Graph object.")
272
+
273
+ @staticmethod
274
+ def process_entries_in_parallel(
275
+ entries: List[Dict],
276
+ reaction_centers: str = "RC",
277
+ its: str = "ITS",
278
+ condensed: bool = True,
279
+ n_jobs: int = 4,
280
+ verbose: int = 0,
281
+ ) -> List[Dict]:
282
+ """
283
+ Processes a list of entries in parallel to enhance each entry with descriptors.
284
+
285
+ Parameters:
286
+ - entries (List[Dict]): List of dictionaries containing reaction data to enhance.
287
+ - reaction_centers (str): Key to retrieve reaction center graph data from each
288
+ entry dictionary.
289
+ - its (str): Key to retrieve ITS (Intermediate Transition State) graph data from
290
+ each entry dictionary.
291
+ - condensed (bool): If True, condenses node signatures with counts.
292
+ - n_jobs (int): Number of jobs to run in parallel. -1 uses all processors.
293
+ - verbose (int): The verbosity level for joblib's Parallel.
294
+
295
+ Returns:
296
+ - List[Dict]: A list of enhanced dictionaries with added descriptors.
297
+ """
298
+ return Parallel(n_jobs=n_jobs, verbose=verbose)(
299
+ delayed(GraphDescriptor.get_descriptors)(
300
+ entry, reaction_centers, its, condensed
301
+ )
302
+ for entry in entries
303
+ )
304
+
305
+
306
+ def check_graph_connectivity(graph: nx.Graph) -> str:
307
+ """
308
+ Check the connectivity of a NetworkX graph.
309
+
310
+ This function assesses whether all nodes in the graph are connected by some path,
311
+ applicable to undirected graphs.
312
+
313
+ Parameters:
314
+ - graph (nx.Graph): A NetworkX graph object.
315
+
316
+ Returns:
317
+ - str: Returns 'Connected' if the graph is connected, otherwise 'Disconnected'.
318
+
319
+ Raises:
320
+ - NetworkXNotImplemented: If graph is directed and does not support is_connected.
321
+ """
322
+ if nx.is_connected(graph):
323
+ return "Connected"
324
+ else:
325
+ return "Disconnected."
@@ -0,0 +1,97 @@
1
+ import networkx as nx
2
+ import hashlib
3
+ import numpy as np
4
+
5
+
6
+ class GraphFP:
7
+ def __init__(
8
+ self, graph: nx.Graph, nBits: int = 1024, hash_alg: str = "sha256"
9
+ ) -> None:
10
+ """
11
+ Initialize the GraphFP class to create binary fingerprints based on various graph
12
+ characteristics.
13
+
14
+ Parameters:
15
+ - graph (nx.Graph): Graph on which to perform analysis.
16
+ - nBits (int): Size of the binary fingerprint in bits.
17
+ - hash_alg (str): Cryptographic hash function used for hashing.
18
+ """
19
+ self.graph = graph
20
+ self.nBits = nBits
21
+ self.hash_alg = hash_alg
22
+ self.hash_function = getattr(hashlib, self.hash_alg)
23
+
24
+ def fingerprint(self, method: str) -> str:
25
+ """
26
+ Generate a binary string fingerprint of the graph using the specified method.
27
+
28
+ Parameters:
29
+ - method (str): The method to use for fingerprinting
30
+ ('spectrum', 'adjacency', 'degree', 'motif')
31
+
32
+ Returns:
33
+ - str: A binary string of length `nBits` that represents the fingerprint of
34
+ the graph.
35
+ """
36
+ if method == "spectrum":
37
+ fp = self._spectrum_fp()
38
+ elif method == "adjacency":
39
+ fp = self._adjacency_fp()
40
+ elif method == "degree":
41
+ fp = self._degree_sequence_fp()
42
+ elif method == "motif":
43
+ fp = self._motif_count_fp()
44
+ else:
45
+ raise ValueError("Unsupported fingerprinting method.")
46
+
47
+ # If the fingerprint is shorter than nBits, use iterative deepening
48
+ if len(fp) < self.nBits:
49
+ fp += self.iterative_deepening(self.nBits - len(fp))
50
+
51
+ return fp[: self.nBits]
52
+
53
+ def _spectrum_fp(self) -> str:
54
+ # Graph spectrum (eigenvalues of the adjacency matrix)
55
+ eigenvalues = np.linalg.eigvals(nx.adjacency_matrix(self.graph).todense())
56
+ sorted_eigenvalues = np.sort(eigenvalues)[: self.nBits]
57
+ eigen_str = "".join(
58
+ bin(int(abs(eig)))[2:].zfill(8) for eig in sorted_eigenvalues
59
+ )
60
+ return eigen_str[: self.nBits]
61
+
62
+ def _adjacency_fp(self) -> str:
63
+ # Adjacency matrix flattened
64
+ adj_matrix = nx.adjacency_matrix(self.graph).todense().flatten()
65
+ adj_str = "".join(str(int(x)) for x in adj_matrix)
66
+ return adj_str[: self.nBits]
67
+
68
+ def _degree_sequence_fp(self) -> str:
69
+ # Degree sequence
70
+ degrees = sorted([d for n, d in self.graph.degree()], reverse=True)
71
+ degree_str = "".join(bin(d)[2:].zfill(8) for d in degrees)
72
+ return degree_str[: self.nBits]
73
+
74
+ def _motif_count_fp(self) -> str:
75
+ # Motif counts (e.g., number of triangles)
76
+ triangles = sum(nx.triangles(self.graph).values()) // 3
77
+ triangle_str = bin(triangles)[2:].zfill(self.nBits)
78
+ return triangle_str[: self.nBits]
79
+
80
+ def iterative_deepening(self, remaining_bits: int) -> str:
81
+ """
82
+ Extend the hash length using iterative hashing until the desired bit length is
83
+ achieved.
84
+
85
+ Parameters:
86
+ - remaining_bits (int): Number of bits needed to complete the fingerprint
87
+ to `nBits`.
88
+
89
+ Returns:
90
+ - str: Additional binary data to achieve the desired hash length.
91
+ """
92
+ additional_data = ""
93
+ hash_obj = self.hash_function()
94
+ while len(additional_data) * 4 < remaining_bits:
95
+ hash_obj.update(additional_data.encode())
96
+ additional_data += hash_obj.hexdigest()
97
+ return bin(int(additional_data, 16))[2:][:remaining_bits]
@@ -0,0 +1,236 @@
1
+ import hashlib
2
+ import networkx as nx
3
+
4
+
5
+ class GraphSignature:
6
+ """
7
+ Provides methods to generate canonical signatures for graph edges (with flexible 'order' and 'state' attributes,
8
+ and node degrees/neighbor information), various spectral invariants, adjacency matrix, and complete graphs.
9
+ Aims for high uniqueness without relying solely on isomorphism checks.
10
+ """
11
+
12
+ def __init__(self, graph: nx.Graph):
13
+ """
14
+ Initializes the GraphSignature class with a specified graph.
15
+
16
+ Parameters:
17
+ - graph (nx.Graph): A NetworkX graph instance.
18
+ """
19
+ self.graph = graph
20
+ self._validate_graph()
21
+
22
+ def _validate_graph(self):
23
+ """
24
+ Validates that all nodes have the required attributes ('element' and 'charge'),
25
+ and all edges have the required 'order' attribute as int, float, or tuple of two floats,
26
+ and optionally the 'state' attribute.
27
+
28
+ Raises:
29
+ - ValueError: If any node is missing the 'element' or 'charge' attribute,
30
+ or if any edge is missing the 'order' attribute or has an invalid type.
31
+ """
32
+ for node, data in self.graph.nodes(data=True):
33
+ if "element" not in data:
34
+ raise ValueError(f"Node {node} is missing the 'element' attribute.")
35
+ if "charge" not in data:
36
+ raise ValueError(f"Node {node} is missing the 'charge' attribute.")
37
+
38
+ for u, v, data in self.graph.edges(data=True):
39
+ if "order" not in data:
40
+ raise ValueError(f"Edge ({u}, {v}) is missing the 'order' attribute.")
41
+ order = data["order"]
42
+ if isinstance(order, tuple):
43
+ if len(order) != 2 or not all(
44
+ isinstance(o, (int, float)) for o in order
45
+ ):
46
+ raise ValueError(
47
+ f"Edge ({u}, {v}) has an invalid 'order'. It must be a tuple of two ints/floats."
48
+ )
49
+ elif not isinstance(order, (int, float)):
50
+ raise ValueError(
51
+ f"Edge ({u}, {v}) has an invalid 'order'. It must be an int, float, or a tuple of two ints/floats."
52
+ )
53
+
54
+ # Optional: Validate 'state' attribute if present
55
+ state = data.get("state", "steady") # Default to 'steady' if missing
56
+ if state not in {"break", "form", "steady"}:
57
+ raise ValueError(
58
+ f"Edge ({u}, {v}) has an invalid 'state'. It must be 'break', 'form', or 'steady'."
59
+ )
60
+
61
+ def create_edge_signature(
62
+ self, include_neighbors: bool = False, max_hop: int = 2
63
+ ) -> str:
64
+ """
65
+ Generates a canonical edge signature by formatting each edge with sorted node elements (including charge),
66
+ node degrees, bond order, bond state, and optionally including neighbor information and topological context.
67
+
68
+ Parameters:
69
+ - include_neighbors (bool): Whether to include neighbors' details in the edge signature.
70
+ - max_hop (int): Maximum number of hops to include for neighbor-level structural information.
71
+
72
+ Returns:
73
+ - str: A concatenated and sorted string of edge representations.
74
+ """
75
+ edge_signature_parts = []
76
+
77
+ for u, v, data in self.graph.edges(data=True):
78
+ # Retrieve bond order (default to (1.0, 1.0) if missing)
79
+ order = data.get("order", (1.0, 1.0))
80
+
81
+ # Format order as a tuple (default or actual value)
82
+ if isinstance(order, tuple):
83
+ order_str = f"{{{order[0]:.1f},{order[1]:.1f}}}"
84
+ else:
85
+ order_str = f"{float(order):.1f}"
86
+
87
+ # Get node elements and charges for both nodes
88
+ node1_element = self.graph.nodes[u].get(
89
+ "element", "X"
90
+ ) # Default to 'X' if missing
91
+ node1_charge = self.graph.nodes[u].get(
92
+ "charge", 0
93
+ ) # Default to 0 if missing
94
+ node2_element = self.graph.nodes[v].get("element", "X")
95
+ node2_charge = self.graph.nodes[v].get("charge", 0)
96
+
97
+ # Construct node representation with element and charge
98
+ node1 = f"{node1_element}{node1_charge}"
99
+ node2 = f"{node2_element}{node2_charge}"
100
+
101
+ # Optionally include neighbors in the signature
102
+ if include_neighbors:
103
+ neighbors_u = sorted(
104
+ [
105
+ f"{self.graph.nodes[neighbor].get('element', 'X')}{self.graph.nodes[neighbor].get('charge', 0)}"
106
+ + f"d{self.graph.degree(neighbor)}"
107
+ for neighbor in self.graph.neighbors(u)
108
+ ]
109
+ )
110
+ neighbors_v = sorted(
111
+ [
112
+ f"{self.graph.nodes[neighbor].get('element', 'X')}{self.graph.nodes[neighbor].get('charge', 0)}"
113
+ + f"d{self.graph.degree(neighbor)}"
114
+ for neighbor in self.graph.neighbors(v)
115
+ ]
116
+ )
117
+
118
+ # Represent neighbors within square brackets
119
+ node1_neighbors = "".join(neighbors_u)
120
+ node2_neighbors = "".join(neighbors_v)
121
+ node1 = f"{node1}[{node1_neighbors}]"
122
+ node2 = f"{node2}[{node2_neighbors}]"
123
+
124
+ # Include k-hop neighborhood information
125
+ if max_hop > 1:
126
+ node1_neighbors_khop = self._get_khop_neighbors(u, max_hop)
127
+ node2_neighbors_khop = self._get_khop_neighbors(v, max_hop)
128
+ node1 += f"[{node1_neighbors_khop}]"
129
+ node2 += f"[{node2_neighbors_khop}]"
130
+
131
+ # Sort nodes to ensure consistency in edge signature (avoid direction dependency)
132
+ node1, node2 = sorted([node1, node2])
133
+
134
+ # Format the edge signature and append it
135
+ edge_part = f"{node1}{order_str}{node2}"
136
+ edge_signature_parts.append(edge_part)
137
+
138
+ # Sort all edge signatures to ensure consistency in the final representation
139
+ return "/".join(sorted(edge_signature_parts))
140
+
141
+ def _get_khop_neighbors(self, node, max_hop):
142
+ """
143
+ Retrieves the k-hop neighborhood information for a given node.
144
+
145
+ Parameters:
146
+ - node (int): The node for which to get neighborhood information.
147
+ - max_hop (int): Maximum number of hops for neighborhood exploration.
148
+
149
+ Returns:
150
+ - str: A concatenated string representing the k-hop neighborhood information.
151
+ """
152
+ k_hop_neighbors = []
153
+ current_hop_neighbors = [node]
154
+ for _ in range(max_hop):
155
+ next_hop_neighbors = []
156
+ for n in current_hop_neighbors:
157
+ next_hop_neighbors.extend(list(self.graph.neighbors(n)))
158
+ # Filter out already seen nodes to avoid loops
159
+ next_hop_neighbors = set(next_hop_neighbors) - set(k_hop_neighbors)
160
+ k_hop_neighbors.extend(next_hop_neighbors)
161
+ current_hop_neighbors = next_hop_neighbors
162
+
163
+ # Return sorted k-hop neighborhood info
164
+ return "".join(
165
+ sorted(
166
+ [
167
+ f"{self.graph.nodes[neighbor].get('element', 'X')}{self.graph.nodes[neighbor].get('charge', 0)}"
168
+ for neighbor in k_hop_neighbors
169
+ ]
170
+ )
171
+ )
172
+
173
+ def create_wl_hash(self, iterations: int = 3) -> str:
174
+ """
175
+ Generates a Weisfeiler-Lehman (WL) hash for the graph to capture its structural features.
176
+
177
+ Parameters:
178
+ - iterations (int): Number of WL iterations to perform.
179
+
180
+ Returns:
181
+ - str: A hexadecimal hash representing the WL feature.
182
+ """
183
+ # Initialize labels with both 'element' and 'charge'
184
+ labels = {
185
+ node: f"{data['element']}{data.get('charge', 0)}"
186
+ for node, data in self.graph.nodes(data=True)
187
+ }
188
+ for _ in range(iterations):
189
+ new_labels = {}
190
+ for node in self.graph.nodes():
191
+ # Gather sorted labels of neighbors
192
+ neighbor_labels = sorted(
193
+ labels[neighbor] for neighbor in self.graph.neighbors(node)
194
+ )
195
+ # Concatenate current label with neighbor labels
196
+ concatenated = labels[node] + "".join(neighbor_labels)
197
+ # Hash the concatenated string to obtain a new label
198
+ new_label = hashlib.sha256(concatenated.encode()).hexdigest()
199
+ new_labels[node] = new_label
200
+ labels = new_labels
201
+ # Aggregate all node labels into a sorted string and hash it
202
+ sorted_labels = sorted(labels.values())
203
+ aggregated = "".join(sorted_labels)
204
+ graph_hash = hashlib.sha256(aggregated.encode()).hexdigest()
205
+ return graph_hash
206
+
207
+ def create_graph_signature(
208
+ self,
209
+ include_wl_hash: bool = True,
210
+ include_neighbors: bool = True,
211
+ max_hop: int = 1,
212
+ ) -> str:
213
+ """
214
+ Combines edge, various spectral invariants, and WL hash into a single comprehensive graph signature.
215
+
216
+ Parameters:
217
+ - include_wl_hash (bool): Whether to include the Weisfeiler-Lehman hash.
218
+ - include_spectral (bool): Whether to include spectral invariants.
219
+ - include_combined_hash (bool): Whether to include the combined hash.
220
+ - include_neighbors (bool): Whether to include neighbor information in edge signatures.
221
+
222
+ Returns:
223
+ - str: A concatenated string representing the complete graph signature.
224
+ """
225
+ signatures = []
226
+
227
+ if include_wl_hash:
228
+ wl_signature = self.create_wl_hash()
229
+ signatures.append(f"{wl_signature}")
230
+
231
+ edge_signature = self.create_edge_signature(
232
+ include_neighbors=include_neighbors, max_hop=max_hop
233
+ )
234
+ signatures.append(f"{edge_signature}")
235
+
236
+ return "|".join(signatures)