synkit 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. synkit/Chem/Fingerprint/__init__.py +0 -0
  2. synkit/Chem/Fingerprint/fp_calculator.py +122 -0
  3. synkit/Chem/Fingerprint/smiles_featurizer.py +185 -0
  4. synkit/Chem/Fingerprint/transformation_fp.py +79 -0
  5. synkit/Chem/Molecule/__init__.py +0 -0
  6. synkit/Chem/Molecule/standardize.py +137 -0
  7. synkit/Chem/Reaction/__init__.py +0 -0
  8. synkit/Chem/Reaction/balance_check.py +162 -0
  9. synkit/Chem/Reaction/cleanning.py +59 -0
  10. synkit/Chem/Reaction/deionize.py +289 -0
  11. synkit/Chem/Reaction/neutralize.py +256 -0
  12. synkit/Chem/Reaction/reagent.py +102 -0
  13. synkit/Chem/Reaction/standardize.py +157 -0
  14. synkit/Chem/Reaction/tautomerize.py +168 -0
  15. synkit/Graph/Cluster/__init__.py +0 -0
  16. synkit/Graph/Cluster/morphism.py +83 -0
  17. synkit/Graph/Feature/__init__.py +0 -0
  18. synkit/Graph/Feature/graph_descriptors.py +325 -0
  19. synkit/Graph/Feature/graph_fps.py +97 -0
  20. synkit/Graph/Feature/graph_signature.py +236 -0
  21. synkit/Graph/Feature/hash_fps.py +130 -0
  22. synkit/Graph/Feature/morgan_fps.py +87 -0
  23. synkit/Graph/Feature/path_fps.py +82 -0
  24. synkit/Graph/__init.py +0 -0
  25. synkit/IO/__init__.py +0 -0
  26. synkit/IO/chem_converter.py +231 -0
  27. synkit/IO/data_io.py +277 -0
  28. synkit/IO/data_process.py +49 -0
  29. synkit/IO/debug.py +78 -0
  30. synkit/IO/dg_to_gml.py +124 -0
  31. synkit/IO/gml_to_nx.py +119 -0
  32. synkit/IO/graph_to_mol.py +110 -0
  33. synkit/IO/mol_to_graph.py +282 -0
  34. synkit/IO/nx_to_gml.py +200 -0
  35. synkit/IO/parse_rule.py +172 -0
  36. synkit/IO/smiles_to_id.py +119 -0
  37. synkit/ITS/_misc.py +280 -0
  38. synkit/ITS/aam_validator.py +254 -0
  39. synkit/ITS/its_builder.py +94 -0
  40. synkit/ITS/its_construction.py +213 -0
  41. synkit/ITS/normalize_aam.py +183 -0
  42. synkit/ITS/partial_expand.py +170 -0
  43. synkit/Reactor/__init__.py +0 -0
  44. synkit/Reactor/core_engine.py +164 -0
  45. synkit/Reactor/inference.py +73 -0
  46. synkit/Reactor/multi_step.py +227 -0
  47. synkit/Reactor/multi_step_aam.py +82 -0
  48. synkit/Reactor/reagent.py +95 -0
  49. synkit/Reactor/rule_apply.py +81 -0
  50. synkit/Vis/__init__.py +0 -0
  51. synkit/Vis/chemical_graph_visualizer.py +378 -0
  52. synkit/Vis/chemical_reaction_visualizer.py +133 -0
  53. synkit/Vis/chemical_space.py +83 -0
  54. synkit/Vis/embedding.py +92 -0
  55. synkit/Vis/graph_visualizer.py +286 -0
  56. synkit/Vis/pdf_writer.py +143 -0
  57. synkit/Vis/rsmi_to_fig.py +169 -0
  58. synkit/__init__.py +0 -0
  59. synkit/_misc.py +181 -0
  60. synkit-0.0.1.dist-info/METADATA +148 -0
  61. synkit-0.0.1.dist-info/RECORD +63 -0
  62. synkit-0.0.1.dist-info/WHEEL +4 -0
  63. synkit-0.0.1.dist-info/licenses/LICENSE +21 -0
synkit/IO/data_io.py ADDED
@@ -0,0 +1,277 @@
1
+ import os
2
+ import json
3
+ import pickle
4
+ import numpy as np
5
+ from numpy import ndarray
6
+ from joblib import dump, load
7
+ from typing import List, Dict, Any, Generator
8
+ from synkit.IO.debug import setup_logging
9
+
10
+ logger = setup_logging()
11
+
12
+
13
+ def save_database(database: list[dict], pathname: str = "./Data/database.json") -> None:
14
+ """
15
+ Save a database (a list of dictionaries) to a JSON file.
16
+
17
+ Parameters:
18
+ - database: The database to be saved.
19
+ - pathname: The path where the database will be saved.
20
+ Defaults to './Data/database.json'.
21
+
22
+ Raises:
23
+ - TypeError: If the database is not a list of dictionaries.
24
+ - ValueError: If there is an error writing the file.
25
+ """
26
+ if not all(isinstance(item, dict) for item in database):
27
+ raise TypeError("Database should be a list of dictionaries.")
28
+
29
+ try:
30
+ with open(pathname, "w") as f:
31
+ json.dump(database, f)
32
+ except IOError as e:
33
+ raise ValueError(f"Error writing to file {pathname}: {e}")
34
+
35
+
36
+ def load_database(pathname: str = "./Data/database.json") -> List[Dict]:
37
+ """
38
+ Load a database (a list of dictionaries) from a JSON file.
39
+
40
+ Parameters:
41
+ - pathname: The path from where the database will be loaded.
42
+ Defaults to './Data/database.json'.
43
+
44
+ Returns:
45
+ - List[Dict]: The loaded database.
46
+
47
+ Raises:
48
+ - ValueError: If there is an error reading the file.
49
+ """
50
+ try:
51
+ with open(pathname, "r") as f:
52
+ database = json.load(f) # Load the JSON data from the file
53
+ return database
54
+ except IOError as e:
55
+ raise ValueError(f"Error reading to file {pathname}: {e}")
56
+
57
+
58
+ def save_to_pickle(data: List[Dict[str, Any]], filename: str) -> None:
59
+ """
60
+ Save a list of dictionaries to a pickle file.
61
+
62
+ Parameters:
63
+ - data (List[Dict[str, Any]]): A list of dictionaries to be saved.
64
+ - filename (str): The name of the file where the data will be saved.
65
+ """
66
+ with open(filename, "wb") as file:
67
+ pickle.dump(data, file)
68
+
69
+
70
+ def load_from_pickle(filename: str) -> List[Any]:
71
+ """
72
+ Load data from a pickle file.
73
+
74
+ Parameters:
75
+ - filename (str): The name of the pickle file to load data from.
76
+
77
+ Returns:
78
+ - List[Any]: The data loaded from the pickle file.
79
+ """
80
+ with open(filename, "rb") as file:
81
+ return pickle.load(file)
82
+
83
+
84
+ def load_gml_as_text(gml_file_path):
85
+ """
86
+ Load the contents of a GML file as a text string.
87
+
88
+ Parameters:
89
+ - gml_file_path (str): The file path to the GML file.
90
+
91
+ Returns:
92
+ - str: The text content of the GML file.
93
+ """
94
+ try:
95
+ with open(gml_file_path, "r") as file:
96
+ return file.read()
97
+ except FileNotFoundError:
98
+ print(f"File not found: {gml_file_path}")
99
+ return None
100
+ except Exception as e:
101
+ print(f"An error occurred: {e}")
102
+ return None
103
+
104
+
105
+ def save_text_as_gml(gml_text, file_path):
106
+ """
107
+ Save a GML text string to a file.
108
+
109
+ Parameters:
110
+ - gml_text (str): The GML content as a text string.
111
+ - file_path (str): The file path where the GML text will be saved.
112
+
113
+ Returns:
114
+ - bool: True if saving was successful, False otherwise.
115
+ """
116
+ try:
117
+ with open(file_path, "w") as file:
118
+ file.write(gml_text)
119
+ print(f"GML text successfully saved to {file_path}")
120
+ return True
121
+ except Exception as e:
122
+ print(f"An error occurred while saving the GML text: {e}")
123
+ return False
124
+
125
+
126
+ def save_compressed(array: ndarray, filename: str) -> None:
127
+ """
128
+ Saves a NumPy array in a compressed format using .npz extension.
129
+
130
+ Parameters:
131
+ - array (ndarray): The NumPy array to be saved.
132
+ - filename (str): The file path or name to save the array to,
133
+ with a '.npz' extension.
134
+
135
+ Returns:
136
+ - None: This function does not return any value.
137
+ """
138
+ np.savez_compressed(filename, array=array)
139
+
140
+
141
+ def load_compressed(filename: str) -> ndarray:
142
+ """
143
+ Loads a NumPy array from a compressed .npz file.
144
+
145
+ Parameters:
146
+ - filename (str): The path of the .npz file to load.
147
+
148
+ Returns:
149
+ - ndarray: The loaded NumPy array.
150
+
151
+ Raises:
152
+ - KeyError: If the .npz file does not contain an array with the key 'array'.
153
+ """
154
+ with np.load(filename) as data:
155
+ if "array" in data:
156
+ return data["array"]
157
+ else:
158
+ raise KeyError(
159
+ "The .npz file does not contain" + " an array with the key 'array'."
160
+ )
161
+
162
+
163
+ def save_model(model: Any, filename: str) -> None:
164
+ """
165
+ Save a machine learning model to a file using joblib.
166
+
167
+ Parameters:
168
+ - model (Any): The machine learning model to save.
169
+ - filename (str): The path to the file where the model will be saved.
170
+ """
171
+ dump(model, filename)
172
+ logger.info(f"Model saved successfully to {filename}")
173
+
174
+
175
+ def load_model(filename: str) -> Any:
176
+ """
177
+ Load a machine learning model from a file using joblib.
178
+
179
+ Parameters:
180
+ - filename (str): The path to the file from which the model will be loaded.
181
+
182
+ Returns:
183
+ - Any: The loaded machine learning model.
184
+ """
185
+ model = load(filename)
186
+ logger.info(f"Model loaded successfully from {filename}")
187
+ return model
188
+
189
+
190
+ def save_dict_to_json(data: dict, file_path: str) -> None:
191
+ """
192
+ Save a dictionary to a JSON file.
193
+
194
+ Parameters:
195
+ -----------
196
+ data : dict
197
+ The dictionary to be saved.
198
+
199
+ file_path : str
200
+ The path to the file where the dictionary should be saved.
201
+ Make sure the file has a .json extension.
202
+
203
+ Returns:
204
+ --------
205
+ None
206
+ """
207
+ with open(file_path, "w") as json_file:
208
+ json.dump(data, json_file, indent=4)
209
+
210
+ logger.info(f"Dictionary successfully saved to {file_path}")
211
+
212
+
213
+ def load_dict_from_json(file_path: str) -> dict:
214
+ """
215
+ Load a dictionary from a JSON file.
216
+
217
+ Parameters:
218
+ -----------
219
+ file_path : str
220
+ The path to the JSON file from which to load the dictionary.
221
+ Make sure the file has a .json extension.
222
+
223
+ Returns:
224
+ --------
225
+ dict
226
+ The dictionary loaded from the JSON file.
227
+ """
228
+ try:
229
+ with open(file_path, "r") as json_file:
230
+ data = json.load(json_file)
231
+ logger.info(f"Dictionary successfully loaded from {file_path}")
232
+ return data
233
+ except Exception as e:
234
+ logger.error(e)
235
+ return None
236
+
237
+
238
+ def load_from_pickle_generator(file_path: str) -> Generator[Any, None, None]:
239
+ """
240
+ A generator that yields items from a pickle file where each pickle load returns a list
241
+ of dictionaries.
242
+
243
+ Paremeters:
244
+ - file_path (str): The path to the pickle file to load.
245
+
246
+ - Yields:
247
+ Any: Yields a single item from the list of dictionaries stored in the pickle file.
248
+ """
249
+ with open(file_path, "rb") as file:
250
+ while True:
251
+ try:
252
+ batch_items = pickle.load(file)
253
+ for item in batch_items:
254
+ yield item
255
+ except EOFError:
256
+ break
257
+
258
+
259
+ def collect_data(num_batches: int, temp_dir: str, file_template: str) -> List[Any]:
260
+ """
261
+ Collects and aggregates data from multiple pickle files into a single list.
262
+
263
+ Paremeters:
264
+ - num_batches (int): The number of batch files to process.
265
+ - temp_dir (str): The directory where the batch files are stored.
266
+ - file_template (str): The template string for batch file names, expecting an integer
267
+ formatter.
268
+
269
+ Returns:
270
+ List[Any]: A list of aggregated data items from all batch files.
271
+ """
272
+ collected_data: List[Any] = []
273
+ for i in range(num_batches):
274
+ file_path = os.path.join(temp_dir, file_template.format(i))
275
+ for item in load_from_pickle_generator(file_path):
276
+ collected_data.append(item)
277
+ return collected_data
@@ -0,0 +1,49 @@
1
+ from typing import List, Dict, Any
2
+
3
+
4
+ def merge_dicts(
5
+ list1: List[Dict[str, Any]],
6
+ list2: List[Dict[str, Any]],
7
+ key: str,
8
+ intersection: bool = True,
9
+ ) -> List[Dict[str, Any]]:
10
+ """
11
+ Merges two lists of dictionaries based on a specified key, with an option to
12
+ either merge only dictionaries with matching key values (intersection) or
13
+ all dictionaries (union).
14
+
15
+ Parameters:
16
+ - list1 (List[Dict[str, Any]]): The first list of dictionaries.
17
+ - list2 (List[Dict[str, Any]]): The second list of dictionaries.
18
+ - key (str): The key used to match and merge dictionaries from both lists.
19
+ - intersection (bool): If True, only merge dictionaries with matching key values;
20
+ if False, merge all dictionaries, combining those with matching key values.
21
+
22
+ Returns:
23
+ - List[Dict[str, Any]]: A list of dictionaries with merged contents from both
24
+ input lists according to the specified merging strategy.
25
+ """
26
+ dict1 = {item[key]: item for item in list1}
27
+ dict2 = {item[key]: item for item in list2}
28
+
29
+ if intersection:
30
+ # Intersection of keys: only keys present in both dictionaries are merged
31
+ merged_list = []
32
+ for item1 in list1:
33
+ r_id = item1.get(key)
34
+ if r_id in dict2:
35
+ merged_item = {**item1, **dict2[r_id]}
36
+ merged_list.append(merged_item)
37
+ return merged_list
38
+ else:
39
+ # Union of keys: all keys from both dictionaries are merged
40
+ merged_dict = {}
41
+ all_keys = set(dict1) | set(dict2)
42
+ for k in all_keys:
43
+ if k in dict1 and k in dict2:
44
+ merged_dict[k] = {**dict1[k], **dict2[k]}
45
+ elif k in dict1:
46
+ merged_dict[k] = dict1[k]
47
+ else:
48
+ merged_dict[k] = dict2[k]
49
+ return list(merged_dict.values())
synkit/IO/debug.py ADDED
@@ -0,0 +1,78 @@
1
+ import os
2
+ import logging
3
+ import warnings
4
+ from rdkit import rdBase
5
+
6
+
7
+ def setup_logging(log_level: str = "INFO", log_filename: str = None) -> logging.Logger:
8
+ """
9
+ Configures logging to either the console or a file based on provided parameters.
10
+
11
+ Parameters
12
+ ----------
13
+ log_level : str, optional
14
+ Logging level to set. Defaults to 'INFO'. Options include 'DEBUG', 'INFO',
15
+ 'WARNING', 'ERROR', 'CRITICAL'.
16
+ log_filename : str, optional
17
+ If provided, logs are written to this file. Defaults to None,
18
+ which logs to console.
19
+
20
+ Returns
21
+ -------
22
+ logging.Logger
23
+ Configured logger instance.
24
+
25
+ Raises
26
+ ------
27
+ ValueError
28
+ If an invalid log level is provided.
29
+ """
30
+ log_format = "%(asctime)s - %(levelname)s - %(message)s"
31
+ numeric_level = getattr(logging, log_level.upper(), None)
32
+
33
+ if not isinstance(numeric_level, int):
34
+ raise ValueError(f"Invalid log level: {log_level}")
35
+
36
+ logger = logging.getLogger()
37
+ logger.handlers.clear() # Efficiently remove all existing handlers
38
+
39
+ if log_filename:
40
+ os.makedirs(os.path.dirname(log_filename), exist_ok=True)
41
+ logging.basicConfig(
42
+ level=numeric_level, format=log_format, filename=log_filename, filemode="a"
43
+ )
44
+ else:
45
+ logging.basicConfig(level=numeric_level, format=log_format)
46
+
47
+ return logger
48
+
49
+
50
+ def configure_warnings_and_logs(
51
+ ignore_warnings: bool = False, disable_rdkit_logs: bool = False
52
+ ) -> None:
53
+ """
54
+ Configures Python warnings and RDKit log behavior based on input flags.
55
+
56
+ Parameters
57
+ ----------
58
+ ignore_warnings : bool, optional
59
+ Whether to suppress Python warnings. Default is False.
60
+ disable_rdkit_logs : bool, optional
61
+ Whether to disable RDKit error and warning logs. Default is False.
62
+
63
+ Usage
64
+ -----
65
+ This function is useful for controlling verbosity in production or testing, but
66
+ should be used cautiously during development to avoid missing critical issues.
67
+ """
68
+ if ignore_warnings:
69
+ warnings.filterwarnings("ignore")
70
+ else:
71
+ warnings.resetwarnings()
72
+
73
+ if disable_rdkit_logs:
74
+ rdBase.DisableLog("rdApp.error")
75
+ rdBase.DisableLog("rdApp.warning")
76
+ else:
77
+ rdBase.EnableLog("rdApp.error")
78
+ rdBase.EnableLog("rdApp.warning")
synkit/IO/dg_to_gml.py ADDED
@@ -0,0 +1,124 @@
1
+ import regex
2
+ from synkit.IO.debug import setup_logging
3
+ from synkit.Chem.Reaction.standardize import Standardize
4
+ from mod import DGVertexMapper, smiles, Rule
5
+
6
+ logger = setup_logging()
7
+
8
+
9
+ class DGToGML:
10
+ def __init__(self) -> None:
11
+ self.standardizer = Standardize()
12
+ pass
13
+
14
+ @staticmethod
15
+ def getReactionSmiles(dg):
16
+ origSmiles = {}
17
+ for v in dg.vertices:
18
+ s = v.graph.smilesWithIds
19
+ s = regex.sub(":([0-9]+)]", ":o\\1]", s)
20
+ origSmiles[v.graph] = s
21
+
22
+ res = {}
23
+ for e in dg.edges:
24
+ vms = DGVertexMapper(e, rightLimit=1, leftLimit=1)
25
+ # vms = DGVertexMapper(e)
26
+ eductSmiles = [origSmiles[g] for g in vms.left]
27
+
28
+ for ev in vms.left.vertices:
29
+ s = eductSmiles[ev.graphIndex]
30
+ s = s.replace(f":o{ev.vertex.id}]", f":{ev.id}]")
31
+ eductSmiles[ev.graphIndex] = s
32
+
33
+ strs = set()
34
+ for vm in DGVertexMapper(e, rightLimit=1, leftLimit=1):
35
+ # for vm in DGVertexMapper(e):
36
+ productSmiles = [origSmiles[g] for g in vms.right]
37
+ for ev in vms.left.vertices:
38
+ pv = vm.map[ev]
39
+ if not pv:
40
+ continue
41
+ s = productSmiles[pv.graphIndex]
42
+ s = s.replace(f":o{pv.vertex.id}]", f":{ev.id}]")
43
+ productSmiles[pv.graphIndex] = s
44
+ count = vms.left.numVertices
45
+ for pv in vms.right.vertices:
46
+ ev = vm.map.inverse(pv)
47
+ if ev:
48
+ continue
49
+ s = productSmiles[pv.graphIndex]
50
+ s = s.replace(f":o{pv.vertex.id}]", f":{count}]")
51
+ count += 1
52
+ productSmiles[pv.graphIndex] = s
53
+ left = ".".join(eductSmiles)
54
+ right = ".".join(productSmiles)
55
+ s = f"{left}>>{right}"
56
+ assert ":o" not in s
57
+ strs.add(s)
58
+ res[e] = list(sorted(strs))
59
+ return res
60
+
61
+ @staticmethod
62
+ def parseReactionSmiles(line: str) -> Rule:
63
+ sLeft, sRight = line.split(">>")
64
+ ssLeft = sLeft.split(".")
65
+ ssRight = sRight.split(".")
66
+ mLeft = [smiles(s, add=False) for s in ssLeft]
67
+ mRight = [smiles(s, add=False) for s in ssRight]
68
+
69
+ def printGraph(g):
70
+ extFromInt = {}
71
+ for iExt in range(g.minExternalId, g.maxExternalId + 1):
72
+ v = g.getVertexFromExternalId(iExt)
73
+ if not v.isNull():
74
+ extFromInt[v] = iExt
75
+ s = ""
76
+ for v in g.vertices:
77
+ assert v in extFromInt
78
+ s += '\t\tnode [ id %d label "%s" ]\n' % (extFromInt[v], v.stringLabel)
79
+ for e in g.edges:
80
+ s += '\t\tedge [ source %d target %d label "%s" ]\n' % (
81
+ extFromInt[e.source],
82
+ extFromInt[e.target],
83
+ e.stringLabel,
84
+ )
85
+ return s
86
+
87
+ s = "rule [\n\tleft [\n"
88
+ for m in mLeft:
89
+ s += printGraph(m)
90
+ s += "\t]\n\tright [\n"
91
+ for m in mRight:
92
+ s += printGraph(m)
93
+ s += "\t]\n]\n"
94
+ return s, Rule.fromGMLString(s, add=False)
95
+
96
+ def fit(self, dg, origSmiles):
97
+ """
98
+ Matches the original SMILES to a list of generated reaction SMILES and
99
+ returns the parsed reaction.
100
+
101
+ Parameters:
102
+ - dg (DataGenerator): The data generator instance containing the reactions.
103
+ - origSmiles (str): The original SMILES string to match.
104
+
105
+ Returns:
106
+ - Parsed reaction if a match is found; otherwise, None.
107
+ """
108
+ try:
109
+ res = DGToGML.getReactionSmiles(dg)
110
+ smiles_list = [value for values in res.values() for value in values]
111
+
112
+ smiles_standard = [
113
+ self.standardizer.fit(rsmi, True, True) for rsmi in smiles_list
114
+ ]
115
+ origSmiles_standard = self.standardizer.fit(origSmiles, True, True)
116
+
117
+ for index, value in enumerate(smiles_standard):
118
+ if value == origSmiles_standard:
119
+ return self.parseReactionSmiles(smiles_list[index])
120
+
121
+ return None
122
+ except Exception as e:
123
+ logger.error(f"An error occurred: {e}")
124
+ return None
synkit/IO/gml_to_nx.py ADDED
@@ -0,0 +1,119 @@
1
+ import re
2
+ import networkx as nx
3
+ from typing import Tuple
4
+ from synkit.ITS.its_construction import ITSConstruction
5
+
6
+
7
+ class GMLToNX:
8
+ def __init__(self, gml_text: str):
9
+ """
10
+ Initializes a GMLToNX object that can parse GML-like text into separate
11
+ NetworkX graphs representing different stages or components of a chemical reaction.
12
+ """
13
+ self.gml_text = gml_text
14
+ self.graphs = {"left": nx.Graph(), "context": nx.Graph(), "right": nx.Graph()}
15
+
16
+ def _parse_element(self, line: str, current_section: str):
17
+ """
18
+ Parses a line of GML-like text to extract node or edge data and adds it to the
19
+ current section's graph.
20
+ """
21
+ label_to_order = {"-": 1, ":": 1.5, "=": 2, "#": 3}
22
+ tokens = line.split()
23
+
24
+ if "node" in line:
25
+ node_id = int(tokens[tokens.index("id") + 1])
26
+ label = tokens[tokens.index("label") + 1].strip('"')
27
+ element, charge = self._extract_element_and_charge(label)
28
+ node_attributes = {
29
+ "element": element,
30
+ "charge": charge,
31
+ "atom_map": node_id,
32
+ }
33
+ self.graphs[current_section].add_node(node_id, **node_attributes)
34
+
35
+ elif "edge" in line:
36
+ source = int(tokens[tokens.index("source") + 1])
37
+ target = int(tokens[tokens.index("target") + 1])
38
+ label = tokens[tokens.index("label") + 1].strip('"')
39
+ order = label_to_order.get(label, 0)
40
+ self.graphs[current_section].add_edge(source, target, order=order)
41
+
42
+ def _extract_element_and_charge(self, label: str) -> Tuple[str, int]:
43
+ """
44
+ Extracts the chemical element and its charge from a node label.
45
+ """
46
+ match = re.match(r"([A-Za-z*]+)(\d+)?([+-])?$", label)
47
+ if not match:
48
+ return ("X", 0)
49
+ element = match.group(1)
50
+ num = match.group(2)
51
+ sign = match.group(3)
52
+ charge = 0
53
+ if sign:
54
+ charge_val = int(num) if num else 1
55
+ charge = charge_val if sign == "+" else -charge_val
56
+ return element, charge
57
+
58
+ def _synchronize_nodes_and_edges(self):
59
+ """
60
+ Ensures that all nodes and edges in 'context' appear in both 'left' and 'right'.
61
+ We do not remove edges from left or right if they are not in context.
62
+ We only add missing context nodes and edges to left and right.
63
+ """
64
+ # Add missing context nodes to left and right
65
+ for node, ndata in self.graphs["context"].nodes(data=True):
66
+ if node not in self.graphs["left"]:
67
+ self.graphs["left"].add_node(node, **ndata)
68
+ else:
69
+ # Merge attributes if node already exists in left
70
+ for k, v in ndata.items():
71
+ self.graphs["left"].nodes[node][k] = v
72
+
73
+ if node not in self.graphs["right"]:
74
+ self.graphs["right"].add_node(node, **ndata)
75
+ else:
76
+ # Merge attributes if node already exists in right
77
+ for k, v in ndata.items():
78
+ self.graphs["right"].nodes[node][k] = v
79
+
80
+ # Add missing context edges to left and right
81
+ for s, t, edata in self.graphs["context"].edges(data=True):
82
+ if not self.graphs["left"].has_edge(s, t):
83
+ self.graphs["left"].add_edge(s, t, **edata)
84
+ if not self.graphs["right"].has_edge(s, t):
85
+ self.graphs["right"].add_edge(s, t, **edata)
86
+
87
+ def transform(self) -> Tuple[nx.Graph, nx.Graph, nx.Graph]:
88
+ """
89
+ Transforms the GML-like text into three NetworkX graphs: left, right, and context.
90
+ """
91
+ current_section = None
92
+ lines = self.gml_text.split("\n")
93
+ for line in lines:
94
+ line = line.strip()
95
+ if line.startswith("rule") or line == "]":
96
+ continue
97
+ if any(section in line for section in ["left", "context", "right"]):
98
+ current_section = line.split("[")[0].strip()
99
+ continue
100
+ if line.startswith("node") or line.startswith("edge"):
101
+ self._parse_element(line, current_section)
102
+
103
+ # Synchronize after parsing
104
+ self._synchronize_nodes_and_edges()
105
+
106
+ # Create the ITS graph
107
+ its_graph = ITSConstruction.ITSGraph(self.graphs["left"], self.graphs["right"])
108
+
109
+ # Restore node attributes in ITS graph from left (or right)
110
+ for n in its_graph.nodes():
111
+ if n in self.graphs["left"].nodes:
112
+ for k, v in self.graphs["left"].nodes[n].items():
113
+ its_graph.nodes[n][k] = v
114
+
115
+ self.graphs["context"] = ITSConstruction.ITSGraph(
116
+ self.graphs["left"], self.graphs["right"]
117
+ )
118
+
119
+ return self.graphs["left"], self.graphs["right"], self.graphs["context"]