surface-construct 0.8.1__tar.gz → 0.8.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {surface_construct-0.8.1/surface_construct.egg-info → surface_construct-0.8.4}/PKG-INFO +11 -5
- {surface_construct-0.8.1 → surface_construct-0.8.4}/setup.py +1 -4
- {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/sampling.py +113 -96
- surface_construct-0.8.4/surface_construct/surface_grid.py +1115 -0
- surface_construct-0.8.4/surface_construct/utils.py +177 -0
- surface_construct-0.8.4/surface_construct/weight_functions.py +65 -0
- {surface_construct-0.8.1 → surface_construct-0.8.4/surface_construct.egg-info}/PKG-INFO +11 -5
- {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct.egg-info/SOURCES.txt +5 -1
- {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct.egg-info/requires.txt +0 -3
- surface_construct-0.8.4/tests/test_sampling1.py +37 -0
- surface_construct-0.8.4/tests/test_sampling2.py +44 -0
- surface_construct-0.8.4/tests/test_surface_grid.py +105 -0
- surface_construct-0.8.1/surface_construct/surface_grid.py +0 -705
- surface_construct-0.8.1/surface_construct/utils.py +0 -329
- {surface_construct-0.8.1 → surface_construct-0.8.4}/LICENSE +0 -0
- {surface_construct-0.8.1 → surface_construct-0.8.4}/README.md +0 -0
- {surface_construct-0.8.1 → surface_construct-0.8.4}/setup.cfg +0 -0
- {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/__init__.py +0 -0
- {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/atoms.py +0 -0
- {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/db.py +0 -0
- {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/default_parameter.py +0 -0
- {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/structure.py +0 -0
- {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/surface.py +0 -0
- {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct.egg-info/dependency_links.txt +0 -0
- {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: surface_construct
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.4
|
|
4
4
|
Summary: Surface termination construction especially for complex model, such as oxides or carbides.
|
|
5
5
|
Home-page: https://gitee.com/pjren/surface_construct/
|
|
6
6
|
Author: ren
|
|
@@ -13,14 +13,20 @@ Description-Content-Type: text/markdown
|
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
Requires-Dist: ase
|
|
15
15
|
Requires-Dist: networkx
|
|
16
|
-
Requires-Dist: numpy
|
|
17
16
|
Requires-Dist: spglib
|
|
18
17
|
Requires-Dist: pandas
|
|
19
18
|
Requires-Dist: tqdm
|
|
20
|
-
Requires-Dist: matplotlib
|
|
21
|
-
Requires-Dist: scipy
|
|
22
19
|
Requires-Dist: scikit-learn
|
|
23
20
|
Requires-Dist: scikit-image
|
|
21
|
+
Dynamic: author
|
|
22
|
+
Dynamic: author-email
|
|
23
|
+
Dynamic: classifier
|
|
24
|
+
Dynamic: description
|
|
25
|
+
Dynamic: description-content-type
|
|
26
|
+
Dynamic: home-page
|
|
27
|
+
Dynamic: license
|
|
28
|
+
Dynamic: requires-dist
|
|
29
|
+
Dynamic: summary
|
|
24
30
|
|
|
25
31
|
# 基于分层采样策略的催化剂表面位点全局分析
|
|
26
32
|
|
|
@@ -6,19 +6,16 @@ with open("README.md", "r", encoding='utf-8') as f:
|
|
|
6
6
|
install_requires = [
|
|
7
7
|
'ase',
|
|
8
8
|
'networkx',
|
|
9
|
-
'numpy',
|
|
10
9
|
'spglib',
|
|
11
10
|
'pandas',
|
|
12
11
|
'tqdm',
|
|
13
|
-
'matplotlib',
|
|
14
|
-
'scipy',
|
|
15
12
|
'scikit-learn',
|
|
16
13
|
'scikit-image'
|
|
17
14
|
]
|
|
18
15
|
|
|
19
16
|
setup(
|
|
20
17
|
name='surface_construct',
|
|
21
|
-
version='0.8.
|
|
18
|
+
version='0.8.4',
|
|
22
19
|
packages=['surface_construct'],
|
|
23
20
|
url='https://gitee.com/pjren/surface_construct/',
|
|
24
21
|
license='GPL',
|
|
@@ -1,17 +1,37 @@
|
|
|
1
1
|
"""
|
|
2
2
|
TODO: 关键点采样:top 位置、hollow位,bridge 位等等。
|
|
3
3
|
"""
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
import itertools
|
|
6
5
|
import numpy as np
|
|
7
6
|
from ase.geometry import get_distances
|
|
8
|
-
from scipy.spatial import ConvexHull
|
|
7
|
+
from scipy.spatial import ConvexHull, cKDTree
|
|
9
8
|
from scipy.spatial.distance import cdist
|
|
9
|
+
from scipy.special import comb
|
|
10
10
|
from sklearn.cluster import KMeans as Cluster
|
|
11
11
|
import random
|
|
12
12
|
|
|
13
13
|
from surface_construct.utils import furthest_sites
|
|
14
14
|
|
|
15
|
+
MIN_HULL_ANGLE_COS = np.cos(np.pi * 30 / 180)
|
|
16
|
+
|
|
17
|
+
def hull_vertices(hull):
|
|
18
|
+
hsimplices = hull.simplices
|
|
19
|
+
hvertices = hull.vertices
|
|
20
|
+
hnorms = hull.equations[:,0:-1]
|
|
21
|
+
ndim = hsimplices.shape[1]
|
|
22
|
+
vertices = []
|
|
23
|
+
# 去掉 hull 的 simplices 的角度较大的点
|
|
24
|
+
for i in hvertices:
|
|
25
|
+
p0_facets_idx = np.argwhere(hsimplices == i)[:,0]
|
|
26
|
+
p0_norms = hnorms[p0_facets_idx]
|
|
27
|
+
cosangle = lambda a,b: a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))
|
|
28
|
+
# 求 i 凸点相邻的超平面的法向向量之间的夹角。如果存在夹角小于30度,即平面之间的夹角大于150度,则排除该点。反之,保留该点。
|
|
29
|
+
norm_angle_cos = np.absolute([cosangle(a,b) for a,b in itertools.combinations(p0_norms, 2)])
|
|
30
|
+
if np.sum(norm_angle_cos < MIN_HULL_ANGLE_COS) >= comb(ndim,2):
|
|
31
|
+
vertices.append(i)
|
|
32
|
+
|
|
33
|
+
return vertices
|
|
34
|
+
|
|
15
35
|
|
|
16
36
|
def addition_samples(sg_obj, size=None, probability=None, **kwargs):
|
|
17
37
|
if 'seed' in kwargs:
|
|
@@ -51,6 +71,7 @@ def addition_samples(sg_obj, size=None, probability=None, **kwargs):
|
|
|
51
71
|
class SamplingBase:
|
|
52
72
|
def __init__(self, sg_obj, **kwargs):
|
|
53
73
|
self.sg_obj = sg_obj
|
|
74
|
+
self.threshold = kwargs.get('threshold', 0.37) # 0.37 is half of H-H bond
|
|
54
75
|
|
|
55
76
|
@property
|
|
56
77
|
def _pop_size(self):
|
|
@@ -89,21 +110,46 @@ class SamplingBase:
|
|
|
89
110
|
|
|
90
111
|
return point_idx
|
|
91
112
|
|
|
113
|
+
def exclude_too_close_sample(self, idx_list, threshold=None):
|
|
114
|
+
if threshold is None:
|
|
115
|
+
threshold = self.threshold
|
|
116
|
+
if self.sg_obj.sample_idx:
|
|
117
|
+
unique_idx_list = [i for i in idx_list if i not in self.sg_obj.sample_idx]
|
|
118
|
+
points = list(self.sg_obj.sample_points)
|
|
119
|
+
else:
|
|
120
|
+
unique_idx_list = idx_list[:]
|
|
121
|
+
points = []
|
|
122
|
+
new_idx_list = []
|
|
123
|
+
for idx in unique_idx_list:
|
|
124
|
+
p = self.sg_obj.points[idx]
|
|
125
|
+
if len(points) == 0:
|
|
126
|
+
points.append(p)
|
|
127
|
+
new_idx_list.append(idx)
|
|
128
|
+
continue
|
|
129
|
+
tree = cKDTree(points)
|
|
130
|
+
if len(tree.query_ball_point(x=p, r=threshold,p=2))==0:
|
|
131
|
+
points.append(p)
|
|
132
|
+
new_idx_list.append(idx)
|
|
133
|
+
|
|
134
|
+
if len(new_idx_list) != idx_list:
|
|
135
|
+
print(f"Exclude too close sample {set(idx_list)-set(new_idx_list)}")
|
|
136
|
+
return new_idx_list
|
|
92
137
|
|
|
93
138
|
class KeyPointSampling(SamplingBase):
|
|
94
139
|
"""
|
|
95
|
-
|
|
96
|
-
需要一个基础func,从xy坐标,找到对应的格点。
|
|
140
|
+
关键点采样,使用 vip_id
|
|
97
141
|
"""
|
|
98
|
-
def _samples(self,
|
|
99
|
-
|
|
142
|
+
def _samples(self, **kwargs):
|
|
143
|
+
sample_idx = self.sg_obj.unique_vip_id
|
|
144
|
+
clusters = Cluster(n_clusters=len(sample_idx)).fit(self.sg_obj.vector)
|
|
145
|
+
self.sg_obj._clusters = clusters
|
|
146
|
+
return sample_idx
|
|
100
147
|
|
|
101
148
|
|
|
102
149
|
class RandomSampling(SamplingBase):
|
|
103
150
|
"""
|
|
104
151
|
完全随机的选择点,仅用于测试,效率太低。
|
|
105
152
|
"""
|
|
106
|
-
|
|
107
153
|
def __init__(self, sg_obj, **kwargs):
|
|
108
154
|
super().__init__(sg_obj, **kwargs)
|
|
109
155
|
if 'seed' in kwargs:
|
|
@@ -121,7 +167,6 @@ class MaxSigmaSampling(SamplingBase):
|
|
|
121
167
|
"""
|
|
122
168
|
对最大误差的点进行采样
|
|
123
169
|
"""
|
|
124
|
-
|
|
125
170
|
def _samples(self, size, **kwargs):
|
|
126
171
|
if 'energy' in self.sg_obj.grid_property:
|
|
127
172
|
# 如果已经读入了一些能量,则返回误差最大的点
|
|
@@ -133,59 +178,32 @@ class MaxSigmaSampling(SamplingBase):
|
|
|
133
178
|
|
|
134
179
|
class InitialSampling(SamplingBase):
|
|
135
180
|
"""
|
|
136
|
-
|
|
181
|
+
结合使用 KeyPointSampling 和 MaxDiversitySampling
|
|
137
182
|
"""
|
|
138
183
|
|
|
139
184
|
def _samples(self, size, **kwargs):
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
#
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
mesh_centers = cluster0.cluster_centers_
|
|
159
|
-
self.sg_obj._mesh_centers = mesh_centers
|
|
160
|
-
cluster = Cluster(n_clusters=size)
|
|
161
|
-
cluster.fit(mesh_centers)
|
|
162
|
-
self.sg_obj._clusters = cluster
|
|
163
|
-
nvert = len(vertices)
|
|
164
|
-
if nvert >= size:
|
|
165
|
-
warning("Sample number better be larger than {nvert}!")
|
|
166
|
-
if size == 1:
|
|
167
|
-
sample_idx = np.random.choice(vertices,1)
|
|
168
|
-
else:
|
|
169
|
-
sample_idx = [vertices[i] for i in
|
|
170
|
-
furthest_sites(self.sg_obj.vector[vertices], size)]
|
|
171
|
-
else:
|
|
172
|
-
# 聚类
|
|
173
|
-
cluster2 = Cluster(n_clusters=size-nvert)
|
|
174
|
-
cluster2.fit(mesh_centers)
|
|
175
|
-
center_dist = cdist(cluster2.cluster_centers_, self.sg_obj.vector) # 计算每个点到中心的距离
|
|
176
|
-
sample_idx = vertices + np.argmin(center_dist, axis=-1).tolist()
|
|
185
|
+
vip_idx = self.sg_obj.unique_vip_id
|
|
186
|
+
clusters = Cluster(n_clusters=len(vip_idx)).fit(self.sg_obj.vector)
|
|
187
|
+
self.sg_obj._clusters = clusters
|
|
188
|
+
# 如果 size 小于 vip,则从中随机选取部分
|
|
189
|
+
if size == len(vip_idx):
|
|
190
|
+
sample_idx = vip_idx
|
|
191
|
+
self._append_sample_to_sg(point_idx=sample_idx)
|
|
192
|
+
elif size < len(vip_idx):
|
|
193
|
+
print("Warning: The initial sampling size is smaller than the number of key points")
|
|
194
|
+
rng = np.random.default_rng()
|
|
195
|
+
comb_vip = list(itertools.combinations(vip_idx, size))
|
|
196
|
+
sample_idx = rng.choice(comb_vip)
|
|
197
|
+
self._append_sample_to_sg(point_idx=sample_idx)
|
|
198
|
+
else: # 如果 size 大于 vip,则需要 MaxDiversitySampling 新增一些点
|
|
199
|
+
self._append_sample_to_sg(point_idx=vip_idx) # 先增加进去vip 点作为已经采样的点,再进行最大多样性采样
|
|
200
|
+
adding_sample = MaxDiversitySampling(self.sg_obj).samples(size=size-len(vip_idx), **kwargs)
|
|
201
|
+
self._append_sample_to_sg(point_idx=adding_sample)
|
|
202
|
+
sample_idx = np.concatenate([vip_idx, adding_sample])
|
|
177
203
|
return sample_idx
|
|
178
204
|
|
|
179
|
-
def
|
|
180
|
-
|
|
181
|
-
将采样点加入到 sg_obj.sample_points 和相应的 vector
|
|
182
|
-
:return:
|
|
183
|
-
"""
|
|
184
|
-
if point_idx is not None:
|
|
185
|
-
self.sg_obj.sample_idx = np.asarray(point_idx)
|
|
186
|
-
self.sg_obj._sample_vector = self.sg_obj.vector[point_idx]
|
|
187
|
-
self.sg_obj.sample_points = self.sg_obj.points[point_idx]
|
|
188
|
-
|
|
205
|
+
def samples(self, size=1, **kwargs):
|
|
206
|
+
return self._samples(size=size, **kwargs)
|
|
189
207
|
|
|
190
208
|
class MaxDiversitySampling(SamplingBase):
|
|
191
209
|
"""
|
|
@@ -195,8 +213,7 @@ class MaxDiversitySampling(SamplingBase):
|
|
|
195
213
|
* 判断已经采样点属于的类别,找出没有点的类别,空类
|
|
196
214
|
* 如果空类不止一个,比较这些空类中心与旧点的距离,选择距离最大的点。
|
|
197
215
|
"""
|
|
198
|
-
|
|
199
|
-
def _samples(self, size, center=False, **kwargs):
|
|
216
|
+
def _samples(self, size, center=True, **kwargs):
|
|
200
217
|
"""
|
|
201
218
|
|
|
202
219
|
:param size:
|
|
@@ -206,37 +223,43 @@ class MaxDiversitySampling(SamplingBase):
|
|
|
206
223
|
"""
|
|
207
224
|
# 判断是否有过往的采样点,如果没有,调用 InitialSampling
|
|
208
225
|
if self.sg_obj.sample_idx is None:
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
#
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
226
|
+
clusters = Cluster(n_clusters=size).fit(self.sg_obj.vector)
|
|
227
|
+
virgin = list(set(clusters.labels_))
|
|
228
|
+
else:
|
|
229
|
+
cluster_size = len(self.sg_obj.sample_idx) + size
|
|
230
|
+
nvirgin = 0
|
|
231
|
+
larger_clusters = None
|
|
232
|
+
larger_virgin = None
|
|
233
|
+
virgin = None
|
|
234
|
+
clusters = None
|
|
235
|
+
# 如果等于则停止,并保存 cluster
|
|
236
|
+
while nvirgin != size:
|
|
237
|
+
# 以 len(sample_idx) + size 作为新的聚类的size
|
|
238
|
+
clusters = Cluster(n_clusters=cluster_size).fit(self.sg_obj.vector)
|
|
239
|
+
labels = clusters.labels_[self.sg_obj.sample_idx]
|
|
240
|
+
labels_set = set(labels)
|
|
241
|
+
virgin = set(range(cluster_size)) - labels_set
|
|
242
|
+
nvirgin = len(virgin)
|
|
243
|
+
# 判断分类以后空类数目与size的大小
|
|
244
|
+
# 如果大于size,则减小size,并记录空类的数目
|
|
245
|
+
if nvirgin > size:
|
|
246
|
+
cluster_size -= 1
|
|
247
|
+
larger_clusters = clusters
|
|
248
|
+
larger_virgin = virgin
|
|
249
|
+
# 如果小于 size 则增大size,检查上一个size是否有记录,如果有记录则使用上个size 的记录。从中随机选择size个点作为采样点。
|
|
250
|
+
elif nvirgin < size:
|
|
251
|
+
cluster_size += 1
|
|
252
|
+
if larger_clusters is not None:
|
|
253
|
+
clusters = larger_clusters
|
|
254
|
+
virgin = larger_virgin
|
|
255
|
+
break
|
|
237
256
|
# 从 virgin 里面选取 size 个点
|
|
238
257
|
rng = np.random.default_rng()
|
|
239
|
-
|
|
258
|
+
comb_vip = list(itertools.combinations(list(virgin), size))
|
|
259
|
+
cluster_idx = rng.choice(comb_vip)
|
|
260
|
+
if (not center) and 'energy' not in self.sg_obj.grid_property:
|
|
261
|
+
center = True
|
|
262
|
+
print("Warning: Can't get cluster minimum energy, use cluster center instead!")
|
|
240
263
|
if center:
|
|
241
264
|
# 取中心位置的格点
|
|
242
265
|
centers = clusters.cluster_centers_[cluster_idx]
|
|
@@ -248,11 +271,10 @@ class MaxDiversitySampling(SamplingBase):
|
|
|
248
271
|
for c_id in cluster_idx:
|
|
249
272
|
p_idx = np.arange(len(self.sg_obj.points))[clusters.labels_ == c_id]
|
|
250
273
|
# 求这些点的能量最小值
|
|
251
|
-
if 'energy' not in self.sg_obj.grid_property:
|
|
252
|
-
raise NotImplementedError
|
|
253
274
|
p_energy = self.sg_obj.grid_energy[p_idx]
|
|
254
275
|
point_idx.append(p_idx[p_energy.argmin()])
|
|
255
|
-
|
|
276
|
+
# assign cluster to sg_obj
|
|
277
|
+
self.sg_obj._clusters = clusters
|
|
256
278
|
return point_idx
|
|
257
279
|
|
|
258
280
|
|
|
@@ -269,10 +291,6 @@ class RandomWalk(SamplingBase):
|
|
|
269
291
|
"""
|
|
270
292
|
从给定点出发随机行走进行采样
|
|
271
293
|
"""
|
|
272
|
-
|
|
273
|
-
def __init__(self, sg_obj=None, probability=1.0, **kwargs):
|
|
274
|
-
super().__init__(sg_obj, probability, **kwargs)
|
|
275
|
-
|
|
276
294
|
def _samples(self, size, **kwargs):
|
|
277
295
|
raise NotImplementedError
|
|
278
296
|
|
|
@@ -281,7 +299,6 @@ class SystematicSampling(SamplingBase):
|
|
|
281
299
|
"""
|
|
282
300
|
等距采样。主要用于测试。
|
|
283
301
|
"""
|
|
284
|
-
|
|
285
302
|
def _samples(self, size, **kwargs):
|
|
286
303
|
if 'start' in kwargs:
|
|
287
304
|
start = kwargs['start']
|