surface-construct 0.8.1__tar.gz → 0.8.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {surface_construct-0.8.1/surface_construct.egg-info → surface_construct-0.8.4}/PKG-INFO +11 -5
  2. {surface_construct-0.8.1 → surface_construct-0.8.4}/setup.py +1 -4
  3. {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/sampling.py +113 -96
  4. surface_construct-0.8.4/surface_construct/surface_grid.py +1115 -0
  5. surface_construct-0.8.4/surface_construct/utils.py +177 -0
  6. surface_construct-0.8.4/surface_construct/weight_functions.py +65 -0
  7. {surface_construct-0.8.1 → surface_construct-0.8.4/surface_construct.egg-info}/PKG-INFO +11 -5
  8. {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct.egg-info/SOURCES.txt +5 -1
  9. {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct.egg-info/requires.txt +0 -3
  10. surface_construct-0.8.4/tests/test_sampling1.py +37 -0
  11. surface_construct-0.8.4/tests/test_sampling2.py +44 -0
  12. surface_construct-0.8.4/tests/test_surface_grid.py +105 -0
  13. surface_construct-0.8.1/surface_construct/surface_grid.py +0 -705
  14. surface_construct-0.8.1/surface_construct/utils.py +0 -329
  15. {surface_construct-0.8.1 → surface_construct-0.8.4}/LICENSE +0 -0
  16. {surface_construct-0.8.1 → surface_construct-0.8.4}/README.md +0 -0
  17. {surface_construct-0.8.1 → surface_construct-0.8.4}/setup.cfg +0 -0
  18. {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/__init__.py +0 -0
  19. {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/atoms.py +0 -0
  20. {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/db.py +0 -0
  21. {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/default_parameter.py +0 -0
  22. {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/structure.py +0 -0
  23. {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct/surface.py +0 -0
  24. {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct.egg-info/dependency_links.txt +0 -0
  25. {surface_construct-0.8.1 → surface_construct-0.8.4}/surface_construct.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: surface_construct
3
- Version: 0.8.1
3
+ Version: 0.8.4
4
4
  Summary: Surface termination construction especially for complex model, such as oxides or carbides.
5
5
  Home-page: https://gitee.com/pjren/surface_construct/
6
6
  Author: ren
@@ -13,14 +13,20 @@ Description-Content-Type: text/markdown
13
13
  License-File: LICENSE
14
14
  Requires-Dist: ase
15
15
  Requires-Dist: networkx
16
- Requires-Dist: numpy
17
16
  Requires-Dist: spglib
18
17
  Requires-Dist: pandas
19
18
  Requires-Dist: tqdm
20
- Requires-Dist: matplotlib
21
- Requires-Dist: scipy
22
19
  Requires-Dist: scikit-learn
23
20
  Requires-Dist: scikit-image
21
+ Dynamic: author
22
+ Dynamic: author-email
23
+ Dynamic: classifier
24
+ Dynamic: description
25
+ Dynamic: description-content-type
26
+ Dynamic: home-page
27
+ Dynamic: license
28
+ Dynamic: requires-dist
29
+ Dynamic: summary
24
30
 
25
31
  # 基于分层采样策略的催化剂表面位点全局分析
26
32
 
@@ -6,19 +6,16 @@ with open("README.md", "r", encoding='utf-8') as f:
6
6
  install_requires = [
7
7
  'ase',
8
8
  'networkx',
9
- 'numpy',
10
9
  'spglib',
11
10
  'pandas',
12
11
  'tqdm',
13
- 'matplotlib',
14
- 'scipy',
15
12
  'scikit-learn',
16
13
  'scikit-image'
17
14
  ]
18
15
 
19
16
  setup(
20
17
  name='surface_construct',
21
- version='0.8.1',
18
+ version='0.8.4',
22
19
  packages=['surface_construct'],
23
20
  url='https://gitee.com/pjren/surface_construct/',
24
21
  license='GPL',
@@ -1,17 +1,37 @@
1
1
  """
2
2
  TODO: 关键点采样:top 位置、hollow位,bridge 位等等。
3
3
  """
4
- from logging import warning
5
-
4
+ import itertools
6
5
  import numpy as np
7
6
  from ase.geometry import get_distances
8
- from scipy.spatial import ConvexHull
7
+ from scipy.spatial import ConvexHull, cKDTree
9
8
  from scipy.spatial.distance import cdist
9
+ from scipy.special import comb
10
10
  from sklearn.cluster import KMeans as Cluster
11
11
  import random
12
12
 
13
13
  from surface_construct.utils import furthest_sites
14
14
 
15
+ MIN_HULL_ANGLE_COS = np.cos(np.pi * 30 / 180)
16
+
17
+ def hull_vertices(hull):
18
+ hsimplices = hull.simplices
19
+ hvertices = hull.vertices
20
+ hnorms = hull.equations[:,0:-1]
21
+ ndim = hsimplices.shape[1]
22
+ vertices = []
23
+ # 去掉 hull 的 simplices 的角度较大的点
24
+ for i in hvertices:
25
+ p0_facets_idx = np.argwhere(hsimplices == i)[:,0]
26
+ p0_norms = hnorms[p0_facets_idx]
27
+ cosangle = lambda a,b: a.dot(b) / (np.linalg.norm(a) * np.linalg.norm(b))
28
+ # 求 i 凸点相邻的超平面的法向向量之间的夹角。如果存在夹角小于30度,即平面之间的夹角大于150度,则排除该点。反之,保留该点。
29
+ norm_angle_cos = np.absolute([cosangle(a,b) for a,b in itertools.combinations(p0_norms, 2)])
30
+ if np.sum(norm_angle_cos < MIN_HULL_ANGLE_COS) >= comb(ndim,2):
31
+ vertices.append(i)
32
+
33
+ return vertices
34
+
15
35
 
16
36
  def addition_samples(sg_obj, size=None, probability=None, **kwargs):
17
37
  if 'seed' in kwargs:
@@ -51,6 +71,7 @@ def addition_samples(sg_obj, size=None, probability=None, **kwargs):
51
71
  class SamplingBase:
52
72
  def __init__(self, sg_obj, **kwargs):
53
73
  self.sg_obj = sg_obj
74
+ self.threshold = kwargs.get('threshold', 0.37) # 0.37 is half of H-H bond
54
75
 
55
76
  @property
56
77
  def _pop_size(self):
@@ -89,21 +110,46 @@ class SamplingBase:
89
110
 
90
111
  return point_idx
91
112
 
113
+ def exclude_too_close_sample(self, idx_list, threshold=None):
114
+ if threshold is None:
115
+ threshold = self.threshold
116
+ if self.sg_obj.sample_idx:
117
+ unique_idx_list = [i for i in idx_list if i not in self.sg_obj.sample_idx]
118
+ points = list(self.sg_obj.sample_points)
119
+ else:
120
+ unique_idx_list = idx_list[:]
121
+ points = []
122
+ new_idx_list = []
123
+ for idx in unique_idx_list:
124
+ p = self.sg_obj.points[idx]
125
+ if len(points) == 0:
126
+ points.append(p)
127
+ new_idx_list.append(idx)
128
+ continue
129
+ tree = cKDTree(points)
130
+ if len(tree.query_ball_point(x=p, r=threshold,p=2))==0:
131
+ points.append(p)
132
+ new_idx_list.append(idx)
133
+
134
+ if len(new_idx_list) != idx_list:
135
+ print(f"Exclude too close sample {set(idx_list)-set(new_idx_list)}")
136
+ return new_idx_list
92
137
 
93
138
  class KeyPointSampling(SamplingBase):
94
139
  """
95
- 基本思路:先定位 hollow位,再根据图论分析定位 bridge 位,最后定位top位。第二步,筛选等价位点。
96
- 需要一个基础func,从xy坐标,找到对应的格点。
140
+ 关键点采样,使用 vip_id
97
141
  """
98
- def _samples(self, size, **kwargs):
99
- pass
142
+ def _samples(self, **kwargs):
143
+ sample_idx = self.sg_obj.unique_vip_id
144
+ clusters = Cluster(n_clusters=len(sample_idx)).fit(self.sg_obj.vector)
145
+ self.sg_obj._clusters = clusters
146
+ return sample_idx
100
147
 
101
148
 
102
149
  class RandomSampling(SamplingBase):
103
150
  """
104
151
  完全随机的选择点,仅用于测试,效率太低。
105
152
  """
106
-
107
153
  def __init__(self, sg_obj, **kwargs):
108
154
  super().__init__(sg_obj, **kwargs)
109
155
  if 'seed' in kwargs:
@@ -121,7 +167,6 @@ class MaxSigmaSampling(SamplingBase):
121
167
  """
122
168
  对最大误差的点进行采样
123
169
  """
124
-
125
170
  def _samples(self, size, **kwargs):
126
171
  if 'energy' in self.sg_obj.grid_property:
127
172
  # 如果已经读入了一些能量,则返回误差最大的点
@@ -133,59 +178,32 @@ class MaxSigmaSampling(SamplingBase):
133
178
 
134
179
  class InitialSampling(SamplingBase):
135
180
  """
136
- 使用聚类-分层采样进行初始采样
181
+ 结合使用 KeyPointSampling 和 MaxDiversitySampling
137
182
  """
138
183
 
139
184
  def _samples(self, size, **kwargs):
140
- hull = ConvexHull(self.sg_obj.vector)
141
- vertices = []
142
- # 去掉 hull 的 simplices 的角度较大的点
143
- for i in hull.vertices:
144
- p1_idx, p2_idx = np.argwhere(hull.simplices == i)
145
- p0 = hull.points[i]
146
- p1 = hull.points[hull.simplices[p1_idx[0],1-p1_idx[1]]]
147
- p2 = hull.points[hull.simplices[p2_idx[0],1-p2_idx[1]]]
148
- a = p1 - p0
149
- b = p2 - p0
150
- cosangle = a.dot(b)/(np.linalg.norm(a) * np.linalg.norm(b))
151
- if cosangle > np.cos(np.pi*150/180):
152
- vertices.append(i)
153
- # 聚类,vector_mesh
154
- n_vector_mesh = int(hull.volume / (self.sg_obj._vector_unit *
155
- self.sg_obj.interval)**self.sg_obj.vector.shape[1]) + 1
156
- cluster0 = Cluster(n_clusters=n_vector_mesh)
157
- cluster0.fit(self.sg_obj.vector)
158
- mesh_centers = cluster0.cluster_centers_
159
- self.sg_obj._mesh_centers = mesh_centers
160
- cluster = Cluster(n_clusters=size)
161
- cluster.fit(mesh_centers)
162
- self.sg_obj._clusters = cluster
163
- nvert = len(vertices)
164
- if nvert >= size:
165
- warning("Sample number better be larger than {nvert}!")
166
- if size == 1:
167
- sample_idx = np.random.choice(vertices,1)
168
- else:
169
- sample_idx = [vertices[i] for i in
170
- furthest_sites(self.sg_obj.vector[vertices], size)]
171
- else:
172
- # 聚类
173
- cluster2 = Cluster(n_clusters=size-nvert)
174
- cluster2.fit(mesh_centers)
175
- center_dist = cdist(cluster2.cluster_centers_, self.sg_obj.vector) # 计算每个点到中心的距离
176
- sample_idx = vertices + np.argmin(center_dist, axis=-1).tolist()
185
+ vip_idx = self.sg_obj.unique_vip_id
186
+ clusters = Cluster(n_clusters=len(vip_idx)).fit(self.sg_obj.vector)
187
+ self.sg_obj._clusters = clusters
188
+ # 如果 size 小于 vip,则从中随机选取部分
189
+ if size == len(vip_idx):
190
+ sample_idx = vip_idx
191
+ self._append_sample_to_sg(point_idx=sample_idx)
192
+ elif size < len(vip_idx):
193
+ print("Warning: The initial sampling size is smaller than the number of key points")
194
+ rng = np.random.default_rng()
195
+ comb_vip = list(itertools.combinations(vip_idx, size))
196
+ sample_idx = rng.choice(comb_vip)
197
+ self._append_sample_to_sg(point_idx=sample_idx)
198
+ else: # 如果 size 大于 vip,则需要 MaxDiversitySampling 新增一些点
199
+ self._append_sample_to_sg(point_idx=vip_idx) # 先增加进去vip 点作为已经采样的点,再进行最大多样性采样
200
+ adding_sample = MaxDiversitySampling(self.sg_obj).samples(size=size-len(vip_idx), **kwargs)
201
+ self._append_sample_to_sg(point_idx=adding_sample)
202
+ sample_idx = np.concatenate([vip_idx, adding_sample])
177
203
  return sample_idx
178
204
 
179
- def _append_sample_to_sg(self, point_idx=None):
180
- """
181
- 将采样点加入到 sg_obj.sample_points 和相应的 vector
182
- :return:
183
- """
184
- if point_idx is not None:
185
- self.sg_obj.sample_idx = np.asarray(point_idx)
186
- self.sg_obj._sample_vector = self.sg_obj.vector[point_idx]
187
- self.sg_obj.sample_points = self.sg_obj.points[point_idx]
188
-
205
+ def samples(self, size=1, **kwargs):
206
+ return self._samples(size=size, **kwargs)
189
207
 
190
208
  class MaxDiversitySampling(SamplingBase):
191
209
  """
@@ -195,8 +213,7 @@ class MaxDiversitySampling(SamplingBase):
195
213
  * 判断已经采样点属于的类别,找出没有点的类别,空类
196
214
  * 如果空类不止一个,比较这些空类中心与旧点的距离,选择距离最大的点。
197
215
  """
198
-
199
- def _samples(self, size, center=False, **kwargs):
216
+ def _samples(self, size, center=True, **kwargs):
200
217
  """
201
218
 
202
219
  :param size:
@@ -206,37 +223,43 @@ class MaxDiversitySampling(SamplingBase):
206
223
  """
207
224
  # 判断是否有过往的采样点,如果没有,调用 InitialSampling
208
225
  if self.sg_obj.sample_idx is None:
209
- raise "Please add initial samples (e.g. InitialSampling) before invoke this method!"
210
- cluster_size = len(self.sg_obj.sample_idx) + size
211
- nvirgin = 0
212
- larger_clusters = None
213
- larger_virgin = None
214
- virgin = None
215
- clusters = None
216
- # 如果等于则停止,并保存 cluster
217
- while nvirgin != size:
218
- # len(sample_idx) + size 作为新的聚类的size
219
- clusters = Cluster(n_clusters=cluster_size).fit(self.sg_obj.vector)
220
- labels = clusters.labels_[self.sg_obj.sample_idx]
221
- labels_set = set(labels)
222
- virgin = set(range(cluster_size)) - labels_set
223
- nvirgin = len(virgin)
224
- # 判断分类以后空类数目与size的大小
225
- # 如果大于size,则减小size,并记录空类的数目
226
- if nvirgin > size:
227
- cluster_size -= 1
228
- larger_clusters = clusters
229
- larger_virgin = virgin
230
- # 如果小于 size 则增大size,检查上一个size是否有记录,如果有记录则使用上个size 的记录。从中随机选择size个点作为采样点。
231
- elif nvirgin < size:
232
- cluster_size += 1
233
- if larger_clusters is not None:
234
- clusters = larger_clusters
235
- virgin = larger_virgin
236
- break
226
+ clusters = Cluster(n_clusters=size).fit(self.sg_obj.vector)
227
+ virgin = list(set(clusters.labels_))
228
+ else:
229
+ cluster_size = len(self.sg_obj.sample_idx) + size
230
+ nvirgin = 0
231
+ larger_clusters = None
232
+ larger_virgin = None
233
+ virgin = None
234
+ clusters = None
235
+ # 如果等于则停止,并保存 cluster
236
+ while nvirgin != size:
237
+ # len(sample_idx) + size 作为新的聚类的size
238
+ clusters = Cluster(n_clusters=cluster_size).fit(self.sg_obj.vector)
239
+ labels = clusters.labels_[self.sg_obj.sample_idx]
240
+ labels_set = set(labels)
241
+ virgin = set(range(cluster_size)) - labels_set
242
+ nvirgin = len(virgin)
243
+ # 判断分类以后空类数目与size的大小
244
+ # 如果大于size,则减小size,并记录空类的数目
245
+ if nvirgin > size:
246
+ cluster_size -= 1
247
+ larger_clusters = clusters
248
+ larger_virgin = virgin
249
+ # 如果小于 size 则增大size,检查上一个size是否有记录,如果有记录则使用上个size 的记录。从中随机选择size个点作为采样点。
250
+ elif nvirgin < size:
251
+ cluster_size += 1
252
+ if larger_clusters is not None:
253
+ clusters = larger_clusters
254
+ virgin = larger_virgin
255
+ break
237
256
  # 从 virgin 里面选取 size 个点
238
257
  rng = np.random.default_rng()
239
- cluster_idx = rng.choice(list(virgin), size=size)
258
+ comb_vip = list(itertools.combinations(list(virgin), size))
259
+ cluster_idx = rng.choice(comb_vip)
260
+ if (not center) and 'energy' not in self.sg_obj.grid_property:
261
+ center = True
262
+ print("Warning: Can't get cluster minimum energy, use cluster center instead!")
240
263
  if center:
241
264
  # 取中心位置的格点
242
265
  centers = clusters.cluster_centers_[cluster_idx]
@@ -248,11 +271,10 @@ class MaxDiversitySampling(SamplingBase):
248
271
  for c_id in cluster_idx:
249
272
  p_idx = np.arange(len(self.sg_obj.points))[clusters.labels_ == c_id]
250
273
  # 求这些点的能量最小值
251
- if 'energy' not in self.sg_obj.grid_property:
252
- raise NotImplementedError
253
274
  p_energy = self.sg_obj.grid_energy[p_idx]
254
275
  point_idx.append(p_idx[p_energy.argmin()])
255
-
276
+ # assign cluster to sg_obj
277
+ self.sg_obj._clusters = clusters
256
278
  return point_idx
257
279
 
258
280
 
@@ -269,10 +291,6 @@ class RandomWalk(SamplingBase):
269
291
  """
270
292
  从给定点出发随机行走进行采样
271
293
  """
272
-
273
- def __init__(self, sg_obj=None, probability=1.0, **kwargs):
274
- super().__init__(sg_obj, probability, **kwargs)
275
-
276
294
  def _samples(self, size, **kwargs):
277
295
  raise NotImplementedError
278
296
 
@@ -281,7 +299,6 @@ class SystematicSampling(SamplingBase):
281
299
  """
282
300
  等距采样。主要用于测试。
283
301
  """
284
-
285
302
  def _samples(self, size, **kwargs):
286
303
  if 'start' in kwargs:
287
304
  start = kwargs['start']