yu-mcal 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcal/__init__.py +1 -0
- mcal/calculations/__init__.py +0 -0
- mcal/calculations/hopping_mobility_model.py +391 -0
- mcal/calculations/rcal.py +408 -0
- mcal/constants/element_properties.csv +121 -0
- mcal/mcal.py +844 -0
- mcal/utils/__init__.py +0 -0
- mcal/utils/cif_reader.py +645 -0
- mcal/utils/gaus_log_reader.py +91 -0
- mcal/utils/gjf_maker.py +267 -0
- yu_mcal-0.1.4.dist-info/METADATA +263 -0
- yu_mcal-0.1.4.dist-info/RECORD +15 -0
- yu_mcal-0.1.4.dist-info/WHEEL +4 -0
- yu_mcal-0.1.4.dist-info/entry_points.txt +2 -0
- yu_mcal-0.1.4.dist-info/licenses/LICENSE +21 -0
mcal/utils/__init__.py
ADDED
|
File without changes
|
mcal/utils/cif_reader.py
ADDED
|
@@ -0,0 +1,645 @@
|
|
|
1
|
+
"""CifReader beta (2025/10/30)"""
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from itertools import product
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, List, Literal, Tuple
|
|
7
|
+
import warnings
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from numpy.typing import NDArray
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CifReader:
|
|
15
|
+
"""CifReader class.
|
|
16
|
+
|
|
17
|
+
This class is used to read cif file and extract crystal information.
|
|
18
|
+
|
|
19
|
+
Raises
|
|
20
|
+
------
|
|
21
|
+
ElementPropertiesIsNotDefinedError
|
|
22
|
+
Raised when element properties is not defined.
|
|
23
|
+
SymmetryIsNotDefinedError
|
|
24
|
+
Raised when symmetry is not defined.
|
|
25
|
+
ZValueIsNotMatchError
|
|
26
|
+
Raised when z value is not match.
|
|
27
|
+
The atomic bond detection may not be functioning correctly.
|
|
28
|
+
"""
|
|
29
|
+
parent_dir = Path(os.path.abspath(__file__)).parent.parent
|
|
30
|
+
ELEMENT_PROP = pd.read_csv(f'{parent_dir}/constants/element_properties.csv').dropna(axis=0)
|
|
31
|
+
ATOMIC_WEIGHTS = ELEMENT_PROP[['symbol', 'weight']].set_index('symbol').to_dict()['weight']
|
|
32
|
+
COVALENT_RADII = ELEMENT_PROP[['symbol', 'covalent_radius']].set_index('symbol').to_dict()['covalent_radius']
|
|
33
|
+
|
|
34
|
+
def __init__(self, cif_path: str) -> None:
|
|
35
|
+
"""Initialize the CifReader class.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
cif_path : str
|
|
40
|
+
Path of cif file.
|
|
41
|
+
"""
|
|
42
|
+
self.basename = None
|
|
43
|
+
|
|
44
|
+
# Crystal properties
|
|
45
|
+
self.cell_lengths = [None, None, None]
|
|
46
|
+
self.cell_angles = [None, None, None]
|
|
47
|
+
self.lattice = None
|
|
48
|
+
self.symmetry_pos = []
|
|
49
|
+
self.z_value = 0
|
|
50
|
+
self._ref_z_value = 0
|
|
51
|
+
|
|
52
|
+
# Molecule properties
|
|
53
|
+
self.symbols = []
|
|
54
|
+
self.symbols_label = []
|
|
55
|
+
self.coordinates = []
|
|
56
|
+
self.sym_symbols = []
|
|
57
|
+
self.sym_coords = np.empty((0, 3))
|
|
58
|
+
|
|
59
|
+
# Unique molecule
|
|
60
|
+
self.unique_symbols = {}
|
|
61
|
+
self.unique_coords = {}
|
|
62
|
+
|
|
63
|
+
self._reader(cif_path)
|
|
64
|
+
self._calc_lattice()
|
|
65
|
+
self._operate_sym()
|
|
66
|
+
self.sym_symbols, self.sym_coords = self.remove_duplicates(self.sym_symbols, self.sym_coords)
|
|
67
|
+
self._make_adjacency_mat()
|
|
68
|
+
self._split_mols()
|
|
69
|
+
self._put_unit_cell()
|
|
70
|
+
self.sym_symbols, self.sym_coords = self.remove_duplicates(self.sym_symbols, self.sym_coords)
|
|
71
|
+
self._make_adjacency_mat()
|
|
72
|
+
self._split_mols()
|
|
73
|
+
self._calc_z_value()
|
|
74
|
+
|
|
75
|
+
if self._ref_z_value != 0 and self.z_value != self._ref_z_value:
|
|
76
|
+
raise ZValueIsNotMatchError('Z value is not match.')
|
|
77
|
+
|
|
78
|
+
def _calc_lattice(self):
|
|
79
|
+
"""Calculate lattice."""
|
|
80
|
+
a, b, c = self.cell_lengths
|
|
81
|
+
alpha, beta, gamma = tuple(map(lambda x: np.radians(x), self.cell_angles))
|
|
82
|
+
|
|
83
|
+
b_x = b * np.cos(gamma)
|
|
84
|
+
b_y = b * np.sin(gamma)
|
|
85
|
+
c_x = c * np.cos(beta)
|
|
86
|
+
v = ((np.cos(alpha) - np.cos(beta) * np.cos(gamma))) / np.sin(gamma)
|
|
87
|
+
c_y = c * v
|
|
88
|
+
c_z = c * np.sqrt(1 - np.cos(beta)**2 - v**2)
|
|
89
|
+
|
|
90
|
+
self.lattice = np.array((
|
|
91
|
+
(a, 0, 0),
|
|
92
|
+
(b_x, b_y, 0),
|
|
93
|
+
(c_x, c_y, c_z),
|
|
94
|
+
))
|
|
95
|
+
|
|
96
|
+
def _calc_z_value(self):
|
|
97
|
+
"""Calculate z value."""
|
|
98
|
+
for atom_idx in self.bonded_atoms:
|
|
99
|
+
cen_of_weight = self.calc_cen_of_weight(self.sym_coords[atom_idx])
|
|
100
|
+
|
|
101
|
+
if self._is_in_unit_cell(cen_of_weight):
|
|
102
|
+
self.unique_symbols[self.z_value] = self.sym_symbols[atom_idx]
|
|
103
|
+
self.unique_coords[self.z_value] = self.sym_coords[atom_idx]
|
|
104
|
+
self.z_value += 1
|
|
105
|
+
|
|
106
|
+
def _is_in_unit_cell(self, cen_of_weight: NDArray[np.float64]) -> bool:
|
|
107
|
+
"""Determine if the center of weight is in the unit cell.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
cen_of_weight : NDArray[np.float64]
|
|
112
|
+
Center of weight.
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
bool
|
|
117
|
+
True if the center of weight is in the unit cell.
|
|
118
|
+
"""
|
|
119
|
+
if np.all(0 <= cen_of_weight) and np.all(cen_of_weight < 1):
|
|
120
|
+
is_in_unit_cell = True
|
|
121
|
+
else:
|
|
122
|
+
is_in_unit_cell = False
|
|
123
|
+
|
|
124
|
+
return is_in_unit_cell
|
|
125
|
+
|
|
126
|
+
def _make_adjacency_mat(self):
|
|
127
|
+
"""Determine bonding and create the adjacency matrix."""
|
|
128
|
+
num_atoms = len(self.sym_symbols)
|
|
129
|
+
self.adjacency_mat = np.zeros((num_atoms, num_atoms), dtype=bool)
|
|
130
|
+
|
|
131
|
+
self.cart_coords = np.dot(self.sym_coords, self.lattice)
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
covalent_distance = np.array([self.COVALENT_RADII[symbol] for symbol in self.sym_symbols]) \
|
|
135
|
+
+ np.array([self.COVALENT_RADII[symbol] for symbol in self.sym_symbols])[:, np.newaxis]
|
|
136
|
+
except KeyError:
|
|
137
|
+
raise ElementPropertiesIsNotDefinedError('Element properties is not defined.')
|
|
138
|
+
|
|
139
|
+
distance = np.linalg.norm(self.cart_coords[:, np.newaxis, :] - self.cart_coords[np.newaxis, :, :], axis=-1)
|
|
140
|
+
self.adjacency_mat[(distance <= covalent_distance * 1.3) & (distance != 0)] = 1
|
|
141
|
+
|
|
142
|
+
def _operate_sym(self) -> None:
|
|
143
|
+
"""Perform molecular symmetry operations."""
|
|
144
|
+
|
|
145
|
+
def _extract_coord(coord: NDArray[np.float64], idx: int, is_minus: bool) -> NDArray[np.float64]:
|
|
146
|
+
"""Extract coordinates from the coordinate array.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
coord : NDArray[np.float64]
|
|
151
|
+
Coordinate array.
|
|
152
|
+
idx : int
|
|
153
|
+
Index of the coordinate to extract.
|
|
154
|
+
is_minus : bool
|
|
155
|
+
If True, the coordinate is extracted with a minus sign.
|
|
156
|
+
|
|
157
|
+
Returns
|
|
158
|
+
-------
|
|
159
|
+
NDArray[np.float64]
|
|
160
|
+
Extracted coordinate array.
|
|
161
|
+
"""
|
|
162
|
+
if is_minus:
|
|
163
|
+
return -coord[:, idx].copy()
|
|
164
|
+
else:
|
|
165
|
+
return coord[:, idx].copy()
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
if len(self.symmetry_pos) == 0:
|
|
169
|
+
raise SymmetryIsNotDefinedError('Symmetry is not defined.')
|
|
170
|
+
|
|
171
|
+
self.sym_symbols = np.tile(self.symbols, len(self.symmetry_pos))
|
|
172
|
+
|
|
173
|
+
idx_fil = ('x', 'y', 'z')
|
|
174
|
+
|
|
175
|
+
for pos in self.symmetry_pos:
|
|
176
|
+
moved_coord = np.zeros(self.coordinates.shape)
|
|
177
|
+
|
|
178
|
+
sum_array = np.empty(0)
|
|
179
|
+
|
|
180
|
+
for i, s, in enumerate(pos.split(',')):
|
|
181
|
+
matches = re.findall(r'[0-9]/[0-9]', s)
|
|
182
|
+
if matches:
|
|
183
|
+
fraction = eval(f'float({matches[0]})')
|
|
184
|
+
else:
|
|
185
|
+
fraction = 0
|
|
186
|
+
sum_array = np.append(sum_array, fraction)
|
|
187
|
+
|
|
188
|
+
terms = [x.replace('+', '') for x in re.findall(r'\+?\-?[x-z]', s)]
|
|
189
|
+
|
|
190
|
+
for term in terms:
|
|
191
|
+
is_minus = False
|
|
192
|
+
|
|
193
|
+
if '-' in term:
|
|
194
|
+
is_minus = True
|
|
195
|
+
term = term[-1]
|
|
196
|
+
|
|
197
|
+
moved_coord[:, i] += _extract_coord(
|
|
198
|
+
self.coordinates,
|
|
199
|
+
idx_fil.index(term),
|
|
200
|
+
is_minus
|
|
201
|
+
)
|
|
202
|
+
moved_coord = sum_array + moved_coord
|
|
203
|
+
self.sym_coords = np.append(self.sym_coords, moved_coord, axis=0)
|
|
204
|
+
|
|
205
|
+
def _put_unit_cell(self) -> None:
|
|
206
|
+
"""Put molecules into unit cell."""
|
|
207
|
+
for atom_idx in self.bonded_atoms:
|
|
208
|
+
for i, c in enumerate(self.calc_cen_of_weight(self.sym_coords[atom_idx])):
|
|
209
|
+
if 1 <= c:
|
|
210
|
+
change = -int(c)
|
|
211
|
+
elif c < 0:
|
|
212
|
+
change = abs(int(c)) + 1
|
|
213
|
+
else:
|
|
214
|
+
change = 0
|
|
215
|
+
self.sym_coords[atom_idx, i] += change
|
|
216
|
+
|
|
217
|
+
def _reader(self, cif_path: str) -> None:
|
|
218
|
+
"""Read cif file infomation.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
cif_path : str
|
|
223
|
+
Path of cif file.
|
|
224
|
+
"""
|
|
225
|
+
# save index position
|
|
226
|
+
counter = 0
|
|
227
|
+
atom_data_index = {
|
|
228
|
+
'_atom_site_label': None,
|
|
229
|
+
'_atom_site_type_symbol': None,
|
|
230
|
+
'_atom_site_fract_x': None,
|
|
231
|
+
'_atom_site_fract_y': None,
|
|
232
|
+
'_atom_site_fract_z': None,
|
|
233
|
+
}
|
|
234
|
+
symmetry_data_index = None
|
|
235
|
+
|
|
236
|
+
is_read_atom = False
|
|
237
|
+
is_read_sym = False
|
|
238
|
+
|
|
239
|
+
with open(cif_path) as f:
|
|
240
|
+
while True:
|
|
241
|
+
line = f.readline()
|
|
242
|
+
if not line:
|
|
243
|
+
break
|
|
244
|
+
line = line.strip()
|
|
245
|
+
|
|
246
|
+
# remove blank characters
|
|
247
|
+
if not line:
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
if line.startswith('data_'):
|
|
251
|
+
self.basename = '_'.join(line.split('_')[1:])
|
|
252
|
+
|
|
253
|
+
# get unit cell information
|
|
254
|
+
cell_params = ('_cell_length_a', '_cell_length_b', '_cell_length_c', '_cell_angle_alpha', '_cell_angle_beta', '_cell_angle_gamma')
|
|
255
|
+
if line.startswith(tuple(cell_params)):
|
|
256
|
+
value = float(re.sub(r'\(.*\)', '', line.split()[-1]))
|
|
257
|
+
if line.startswith('_cell_length'):
|
|
258
|
+
self.cell_lengths[cell_params.index(line.split()[0])] = value
|
|
259
|
+
else:
|
|
260
|
+
self.cell_angles[cell_params.index(line.split()[0])%3] = value
|
|
261
|
+
elif line.startswith('_cell_formula_units_Z'):
|
|
262
|
+
self._ref_z_value = int(line.split()[-1])
|
|
263
|
+
|
|
264
|
+
# get index position
|
|
265
|
+
if 'loop_' == line:
|
|
266
|
+
counter = 0
|
|
267
|
+
is_read_atom = False
|
|
268
|
+
is_read_sym = False
|
|
269
|
+
continue
|
|
270
|
+
elif '_' == line[0]:
|
|
271
|
+
if line in atom_data_index.keys():
|
|
272
|
+
atom_data_index[line] = counter
|
|
273
|
+
is_read_atom = True
|
|
274
|
+
is_read_sym = False
|
|
275
|
+
elif line in ('_symmetry_equiv_pos_as_xyz', '_space_group_symop_operation_xyz'):
|
|
276
|
+
symmetry_data_index = counter
|
|
277
|
+
is_read_atom = False
|
|
278
|
+
is_read_sym = True
|
|
279
|
+
else:
|
|
280
|
+
is_read_sym = False
|
|
281
|
+
counter += 1
|
|
282
|
+
continue
|
|
283
|
+
elif ';' == line[0]:
|
|
284
|
+
is_read_atom = False
|
|
285
|
+
is_read_sym = False
|
|
286
|
+
continue
|
|
287
|
+
|
|
288
|
+
if line[0] not in ('_', '#'):
|
|
289
|
+
# get symbol and fractional coordinates
|
|
290
|
+
if is_read_atom:
|
|
291
|
+
tmp_atom_data = line.split()
|
|
292
|
+
# remove disorder
|
|
293
|
+
if '?' not in tmp_atom_data[atom_data_index['_atom_site_label']]:
|
|
294
|
+
if atom_data_index['_atom_site_type_symbol'] is None:
|
|
295
|
+
symbol_label = tmp_atom_data[atom_data_index['_atom_site_label']]
|
|
296
|
+
symbol = symbol_label
|
|
297
|
+
for s in ['A', 'B', 'C']:
|
|
298
|
+
symbol = symbol.replace(s, '')
|
|
299
|
+
symbol = re.sub(r'\d+', '', symbol)
|
|
300
|
+
else:
|
|
301
|
+
symbol_label = tmp_atom_data[atom_data_index['_atom_site_label']]
|
|
302
|
+
symbol = tmp_atom_data[atom_data_index['_atom_site_type_symbol']]
|
|
303
|
+
fract_x = tmp_atom_data[atom_data_index['_atom_site_fract_x']]
|
|
304
|
+
fract_y = tmp_atom_data[atom_data_index['_atom_site_fract_y']]
|
|
305
|
+
fract_z = tmp_atom_data[atom_data_index['_atom_site_fract_z']]
|
|
306
|
+
coord = [float(re.sub(r'\(.*\)', '', x)) for x in [fract_x, fract_y, fract_z]]
|
|
307
|
+
self.symbols.append(symbol)
|
|
308
|
+
self.symbols_label.append(symbol_label)
|
|
309
|
+
self.coordinates.append(coord)
|
|
310
|
+
# get symmetry operation information
|
|
311
|
+
elif is_read_sym:
|
|
312
|
+
if "'" in line:
|
|
313
|
+
line = list(map(lambda x: x.strip().replace(' ', ''), line.split("'")))
|
|
314
|
+
self.symmetry_pos.append(line[symmetry_data_index].lower())
|
|
315
|
+
else:
|
|
316
|
+
line = list(map(lambda x: x.strip().replace(' ', ''), line.split()))
|
|
317
|
+
self.symmetry_pos.append(line[symmetry_data_index].lower())
|
|
318
|
+
|
|
319
|
+
self.symbols = np.array(self.symbols)
|
|
320
|
+
self.coordinates = np.array(self.coordinates)
|
|
321
|
+
|
|
322
|
+
def _search_connect_atoms(self, node: int, atoms: List[int], visited: NDArray[bool], num_atoms: int) -> None:
|
|
323
|
+
"""Find bonded atoms using depth-first search.
|
|
324
|
+
|
|
325
|
+
Parameters
|
|
326
|
+
----------
|
|
327
|
+
node : int
|
|
328
|
+
Index of the atom.
|
|
329
|
+
atoms : List[int]
|
|
330
|
+
List of bonded atoms.
|
|
331
|
+
visited : NDArray[bool]
|
|
332
|
+
Array of visited atoms.
|
|
333
|
+
num_atoms : int
|
|
334
|
+
Number of atoms.
|
|
335
|
+
"""
|
|
336
|
+
visited[node] = True
|
|
337
|
+
atoms.append(node)
|
|
338
|
+
for i in range(num_atoms):
|
|
339
|
+
if self.adjacency_mat[node, i] and not visited[i]:
|
|
340
|
+
self._search_connect_atoms(i, atoms, visited, num_atoms)
|
|
341
|
+
|
|
342
|
+
def _split_mols(self) -> None:
|
|
343
|
+
"""Split molecules."""
|
|
344
|
+
self.bonded_atoms = []
|
|
345
|
+
num_atoms = len(self.sym_symbols)
|
|
346
|
+
visited = np.zeros(num_atoms, dtype=bool)
|
|
347
|
+
|
|
348
|
+
for i in range(num_atoms):
|
|
349
|
+
if not visited[i]:
|
|
350
|
+
atoms = []
|
|
351
|
+
self._search_connect_atoms(i, atoms, visited, num_atoms)
|
|
352
|
+
self.bonded_atoms.append(atoms)
|
|
353
|
+
|
|
354
|
+
# get row corresponding to index
|
|
355
|
+
self.sub_matrices = []
|
|
356
|
+
for index_group in self.bonded_atoms:
|
|
357
|
+
# get row corresponding to index
|
|
358
|
+
index_group.sort()
|
|
359
|
+
sub_matrix = self.adjacency_mat[np.ix_(index_group, index_group)]
|
|
360
|
+
self.sub_matrices.append(sub_matrix)
|
|
361
|
+
|
|
362
|
+
def calc_cen_of_weight(self, coordinates: NDArray[np.float64]) -> NDArray[np.float64]:
|
|
363
|
+
"""Calculate center of weight.
|
|
364
|
+
|
|
365
|
+
Parameters
|
|
366
|
+
----------
|
|
367
|
+
coordinates : NDArray[np.float64]
|
|
368
|
+
Coordinates of monomolecular.
|
|
369
|
+
|
|
370
|
+
Returns
|
|
371
|
+
-------
|
|
372
|
+
NDArray[np.float64]
|
|
373
|
+
Center of weight.
|
|
374
|
+
"""
|
|
375
|
+
cen_of_weight = np.average(coordinates, axis=0)
|
|
376
|
+
|
|
377
|
+
return np.round(cen_of_weight, decimals=10)
|
|
378
|
+
|
|
379
|
+
def convert_cart_to_frac(self, cart_coord: NDArray[np.float64]) -> NDArray[np.float64]:
|
|
380
|
+
"""Convert Cartesian coordinates to fractional coordinates.
|
|
381
|
+
|
|
382
|
+
Parameters
|
|
383
|
+
----------
|
|
384
|
+
cart_coord : NDArray[np.float64]
|
|
385
|
+
Cartesian coordinates.
|
|
386
|
+
|
|
387
|
+
Returns
|
|
388
|
+
-------
|
|
389
|
+
NDArray[np.float64]
|
|
390
|
+
Fractional coordinates.
|
|
391
|
+
"""
|
|
392
|
+
a, b, c = self.cell_lengths
|
|
393
|
+
alpha, beta, gamma = tuple(map(lambda x: np.radians(x), self.cell_angles))
|
|
394
|
+
b_x = -np.cos(gamma) / (a*np.sin(gamma))
|
|
395
|
+
b_y = 1 / (b*np.sin(gamma))
|
|
396
|
+
v = np.sqrt(1 - np.cos(alpha)**2 - np.cos(beta)**2 - np.cos(gamma)**2 + 2*np.cos(alpha)*np.cos(beta)*np.cos(gamma))
|
|
397
|
+
c_x = (np.cos(alpha)*np.cos(gamma) - np.cos(beta)) / (a*v*np.sin(gamma))
|
|
398
|
+
c_y = (np.cos(beta)*np.cos(gamma) - np.cos(alpha)) / (b*v*np.sin(gamma))
|
|
399
|
+
c_z = np.sin(gamma) / (c*v)
|
|
400
|
+
|
|
401
|
+
vector = np.array((
|
|
402
|
+
(1/a, 0, 0),
|
|
403
|
+
(b_x, b_y, 0),
|
|
404
|
+
(c_x, c_y, c_z),
|
|
405
|
+
))
|
|
406
|
+
|
|
407
|
+
return np.dot(cart_coord, vector)
|
|
408
|
+
|
|
409
|
+
def convert_frac_to_cart(self, frac_coord: NDArray[np.float64]) -> NDArray[np.float64]:
|
|
410
|
+
"""Convert fractional coordinates to Cartesian coordinates.
|
|
411
|
+
|
|
412
|
+
Parameters
|
|
413
|
+
----------
|
|
414
|
+
frac_coord : NDArray[np.float64]
|
|
415
|
+
Fractional coordinates.
|
|
416
|
+
|
|
417
|
+
Returns
|
|
418
|
+
-------
|
|
419
|
+
NDArray[np.float64]
|
|
420
|
+
Cartesian coordinates.
|
|
421
|
+
"""
|
|
422
|
+
return np.dot(frac_coord, self.lattice)
|
|
423
|
+
|
|
424
|
+
def expand_mols(
|
|
425
|
+
self,
|
|
426
|
+
expand_range: int = 1
|
|
427
|
+
) -> Dict[Tuple[int, int, int], Dict[int, List[Tuple[str, NDArray[np.float64]]]]]:
|
|
428
|
+
"""Generate molecules around unique molecules.
|
|
429
|
+
|
|
430
|
+
Parameters
|
|
431
|
+
----------
|
|
432
|
+
expand_range : int
|
|
433
|
+
The number of molecular cycles produced., by default 1
|
|
434
|
+
|
|
435
|
+
Returns
|
|
436
|
+
-------
|
|
437
|
+
Dict[Tuple[int, int, int], Dict[int, List[Tuple[str, NDArray[np.float64]]]]]
|
|
438
|
+
A nested dictionary containing the expanded molecular structure:
|
|
439
|
+
|
|
440
|
+
- Outer key: Tuple[int, int, int]
|
|
441
|
+
Represents the unit cell offset (i, j, k) relative to the origin unit cell.
|
|
442
|
+
For example, (0, 0, 0) is the origin unit cell, (1, 0, 0) is one unit cell away in the a-direction, etc.
|
|
443
|
+
|
|
444
|
+
- Inner key: int
|
|
445
|
+
The index of the unique molecule within that unit cell.
|
|
446
|
+
|
|
447
|
+
- Value: Tuple[List[str], NDArray[np.float64]]
|
|
448
|
+
A list containing molecular information:
|
|
449
|
+
- List[str]: Element symbols of the molecule
|
|
450
|
+
- NDArray[np.float64]: Cartesian coordinates of the molecule (shape: (3, n))
|
|
451
|
+
"""
|
|
452
|
+
expand_mols = {}
|
|
453
|
+
combs = tuple(product(tuple(range(-expand_range, expand_range+1)), repeat=3))
|
|
454
|
+
|
|
455
|
+
for comb in combs:
|
|
456
|
+
for i, unique_coord in self.unique_coords.items():
|
|
457
|
+
if i == 0:
|
|
458
|
+
expand_mols[comb] = {i: [self.unique_symbols[i], unique_coord + np.array(comb)]}
|
|
459
|
+
else:
|
|
460
|
+
expand_mols[comb][i] = [self.unique_symbols[i], unique_coord + np.array(comb)]
|
|
461
|
+
|
|
462
|
+
return expand_mols
|
|
463
|
+
|
|
464
|
+
def export_unit_cell_file(self, file_path: str, format: Literal['mol', 'xyz'] = 'mol') -> None:
|
|
465
|
+
"""export unit cell file
|
|
466
|
+
|
|
467
|
+
Parameters
|
|
468
|
+
----------
|
|
469
|
+
file_path : str
|
|
470
|
+
Path of the file to be saved.
|
|
471
|
+
format : Literal['mol', 'xyz']
|
|
472
|
+
Format of the file to be saved.
|
|
473
|
+
"""
|
|
474
|
+
unit_cell_file = FileIO()
|
|
475
|
+
for idx, symbols in self.unique_symbols.items():
|
|
476
|
+
unit_cell_file.add_symbols(symbols)
|
|
477
|
+
unit_cell_file.add_coordinates(self.convert_frac_to_cart(self.unique_coords[idx]))
|
|
478
|
+
unit_cell_file.add_adjacency_mat(self.sub_matrices[idx])
|
|
479
|
+
|
|
480
|
+
if format == 'mol' and unit_cell_file.atom_num > 999:
|
|
481
|
+
format = 'xyz'
|
|
482
|
+
file_path = file_path.replace('.mol', '.xyz')
|
|
483
|
+
warnings.warn('The number of atoms is greater than 999. The file is saved as xyz format.')
|
|
484
|
+
|
|
485
|
+
if format == 'mol':
|
|
486
|
+
unit_cell_file.export_mol_file(file_path, header1=self.basename, header2="Generated by cif_reader.py")
|
|
487
|
+
elif format == 'xyz':
|
|
488
|
+
unit_cell_file.export_xyz_file(file_path, comment="Generated by cif_reader.py")
|
|
489
|
+
|
|
490
|
+
def remove_duplicates(
|
|
491
|
+
self,
|
|
492
|
+
symbol: List[str],
|
|
493
|
+
coordinate: NDArray[np.float64],
|
|
494
|
+
tol: float = 1e-4,
|
|
495
|
+
) -> Tuple[List[str], NDArray[np.float64]]:
|
|
496
|
+
"""Remove duplicates from symbol and coordinate arrays based on coordinate with a given tolerance.
|
|
497
|
+
|
|
498
|
+
Parameters
|
|
499
|
+
----------
|
|
500
|
+
symbol : List[str]
|
|
501
|
+
Symbols of molecules.
|
|
502
|
+
coordinate : NDArray[np.float64]
|
|
503
|
+
Coordinates of molecules.
|
|
504
|
+
tol : float
|
|
505
|
+
Tolerance for duplicate detection.
|
|
506
|
+
|
|
507
|
+
Returns
|
|
508
|
+
-------
|
|
509
|
+
Tuple[List[str], NDArray[np.float64]]
|
|
510
|
+
Symbols and coordinates of unique molecules.
|
|
511
|
+
"""
|
|
512
|
+
distance_mat = ((coordinate[np.newaxis, :, :] - coordinate[:, np.newaxis, :]) ** 2).sum(axis=-1)
|
|
513
|
+
dup = (distance_mat <= tol)
|
|
514
|
+
dup = np.tril(dup, k=-1)
|
|
515
|
+
unique_indices = ~dup.any(axis=-1)
|
|
516
|
+
|
|
517
|
+
return symbol[unique_indices], coordinate[unique_indices]
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
class ElementPropertiesIsNotDefinedError(Exception):
|
|
521
|
+
"""Exception raised when element properties is not defined."""
|
|
522
|
+
pass
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
class SymmetryIsNotDefinedError(Exception):
|
|
526
|
+
"""Exception raised when symmetry is not defined."""
|
|
527
|
+
pass
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
class ZValueIsNotMatchError(Exception):
|
|
531
|
+
"""Exception raised when z value is not match."""
|
|
532
|
+
pass
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
class FileIO:
|
|
536
|
+
def __init__(self) -> None:
|
|
537
|
+
self.atom_num = 0
|
|
538
|
+
self.symbols_list = []
|
|
539
|
+
self.coordinates_list = []
|
|
540
|
+
self.adjacency_mat_list = []
|
|
541
|
+
|
|
542
|
+
def add_adjacency_mat(self, adjacency_mat: NDArray[bool]) -> None:
|
|
543
|
+
"""add adjacency matrix
|
|
544
|
+
|
|
545
|
+
Parameters
|
|
546
|
+
----------
|
|
547
|
+
adjacency_mat : NDArray[bool]
|
|
548
|
+
Adjacency matrix.
|
|
549
|
+
"""
|
|
550
|
+
self.adjacency_mat_list.append(adjacency_mat)
|
|
551
|
+
|
|
552
|
+
def add_coordinates(self, coordinates: NDArray[np.float64]) -> None:
|
|
553
|
+
"""add coordinates
|
|
554
|
+
|
|
555
|
+
Parameters
|
|
556
|
+
----------
|
|
557
|
+
coordinates : NDArray[np.float64]
|
|
558
|
+
Coordinates.
|
|
559
|
+
"""
|
|
560
|
+
self.coordinates_list.append(coordinates)
|
|
561
|
+
|
|
562
|
+
def add_symbols(self, symbols: List[str]) -> None:
|
|
563
|
+
"""add symbols
|
|
564
|
+
|
|
565
|
+
Parameters
|
|
566
|
+
----------
|
|
567
|
+
symbols : List[str]
|
|
568
|
+
Symbols.
|
|
569
|
+
"""
|
|
570
|
+
self.atom_num += len(symbols)
|
|
571
|
+
self.symbols_list.append(symbols)
|
|
572
|
+
|
|
573
|
+
def export_mol_file(
|
|
574
|
+
self,
|
|
575
|
+
file_path: str,
|
|
576
|
+
header1: str,
|
|
577
|
+
header2: str,
|
|
578
|
+
) -> None:
|
|
579
|
+
"""export mol file
|
|
580
|
+
|
|
581
|
+
Parameters
|
|
582
|
+
----------
|
|
583
|
+
file_path : str
|
|
584
|
+
Path of the file to be saved.
|
|
585
|
+
header1 : str
|
|
586
|
+
Header line 1.
|
|
587
|
+
header2 : str
|
|
588
|
+
Header line 2.
|
|
589
|
+
"""
|
|
590
|
+
atom_lines = []
|
|
591
|
+
bond_lines = []
|
|
592
|
+
total_atoms = 0
|
|
593
|
+
total_bonds = 0
|
|
594
|
+
|
|
595
|
+
for i in range(len(self.symbols_list)):
|
|
596
|
+
for s, (x, y, z) in zip(self.symbols_list[i], self.coordinates_list[i]):
|
|
597
|
+
atom_lines.append(f"{x:10.4f}{y:10.4f}{z:10.4f} {s:<2s} 0\n")
|
|
598
|
+
|
|
599
|
+
for j in range(len(self.symbols_list[i])):
|
|
600
|
+
for k in range(j):
|
|
601
|
+
if self.adjacency_mat_list[i][j, k] == 1:
|
|
602
|
+
bond_lines.append(f"{j+total_atoms+1:3d}{k+total_atoms+1:3d} 1\n")
|
|
603
|
+
|
|
604
|
+
total_bonds += np.int32(np.sum(np.tril(self.adjacency_mat_list[i], k=-1)))
|
|
605
|
+
total_atoms += len(self.symbols_list[i])
|
|
606
|
+
|
|
607
|
+
with open(file_path, 'w') as f:
|
|
608
|
+
f.write(f"{header1}\n")
|
|
609
|
+
f.write(f"{header2}\n")
|
|
610
|
+
f.write("\n")
|
|
611
|
+
|
|
612
|
+
f.write(f"{total_atoms:3d}{total_bonds:3d} 0 0 0 0 0 0 0 0999 V2000\n")
|
|
613
|
+
|
|
614
|
+
f.writelines(atom_lines)
|
|
615
|
+
f.writelines(bond_lines)
|
|
616
|
+
|
|
617
|
+
f.write("M END\n")
|
|
618
|
+
f.write("$$$$\n")
|
|
619
|
+
|
|
620
|
+
def export_xyz_file(
|
|
621
|
+
self,
|
|
622
|
+
file_path: str,
|
|
623
|
+
comment: str,
|
|
624
|
+
) -> None:
|
|
625
|
+
"""export xyz file
|
|
626
|
+
|
|
627
|
+
Parameters
|
|
628
|
+
----------
|
|
629
|
+
file_path : str
|
|
630
|
+
Path of the file to be saved.
|
|
631
|
+
comment : str
|
|
632
|
+
Comment.
|
|
633
|
+
"""
|
|
634
|
+
xyz_file_lines = []
|
|
635
|
+
total_atoms = 0
|
|
636
|
+
for i in range(len(self.symbols_list)):
|
|
637
|
+
for s, (x, y, z) in zip(self.symbols_list[i], self.coordinates_list[i]):
|
|
638
|
+
xyz_file_lines.append(f"{s:2s} {x:12.6f} {y:12.6f} {z:12.6f}\n")
|
|
639
|
+
|
|
640
|
+
total_atoms += len(self.symbols_list[i])
|
|
641
|
+
|
|
642
|
+
with open(file_path, 'w') as f:
|
|
643
|
+
f.write(f"{total_atoms:3d}\n")
|
|
644
|
+
f.write(f"{comment}\n")
|
|
645
|
+
f.writelines(xyz_file_lines)
|