westpa 2022.11__cp311-cp311-macosx_11_0_arm64.whl → 2022.12__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of westpa might be problematic. Click here for more details.
- westpa/_version.py +3 -3
- westpa/cli/tools/w_assign.py +4 -4
- westpa/cli/tools/w_fluxanl.py +1 -3
- westpa/cli/tools/w_ntop.py +2 -2
- westpa/cli/tools/w_red.py +7 -2
- westpa/core/binning/_assign.cpython-311-darwin.so +0 -0
- westpa/core/binning/assign.py +11 -5
- westpa/core/binning/mab.py +352 -273
- westpa/core/data_manager.py +3 -3
- westpa/core/h5io.py +2 -2
- westpa/core/kinetics/_kinetics.cpython-311-darwin.so +0 -0
- westpa/core/kinetics/matrates.py +1 -1
- westpa/core/propagators/executable.py +10 -6
- westpa/core/reweight/_reweight.cpython-311-darwin.so +0 -0
- westpa/core/sim_manager.py +9 -4
- westpa/core/states.py +7 -7
- westpa/core/we_driver.py +4 -2
- westpa/fasthist/_fasthist.cpython-311-darwin.so +0 -0
- westpa/mclib/__init__.py +10 -3
- westpa/mclib/_mclib.cpython-311-darwin.so +0 -0
- westpa/oldtools/aframe/mcbs.py +9 -2
- westpa/oldtools/cmds/w_ttimes.py +4 -1
- westpa/oldtools/stats/edfs.py +1 -1
- westpa/oldtools/stats/mcbs.py +9 -2
- westpa/trajtree/_trajtree.cpython-311-darwin.so +0 -0
- westpa/westext/stringmethod/string_method.py +1 -1
- westpa/westext/weed/ProbAdjustEquil.py +2 -2
- westpa/westext/weed/weed_driver.py +10 -0
- westpa/westext/wess/wess_driver.py +10 -0
- {westpa-2022.11.dist-info → westpa-2022.12.dist-info}/AUTHORS +8 -8
- {westpa-2022.11.dist-info → westpa-2022.12.dist-info}/METADATA +31 -21
- {westpa-2022.11.dist-info → westpa-2022.12.dist-info}/RECORD +36 -37
- {westpa-2022.11.dist-info → westpa-2022.12.dist-info}/WHEEL +2 -1
- westpa/fasthist/__main__.py +0 -110
- {westpa-2022.11.dist-info → westpa-2022.12.dist-info}/LICENSE +0 -0
- {westpa-2022.11.dist-info → westpa-2022.12.dist-info}/entry_points.txt +0 -0
- {westpa-2022.11.dist-info → westpa-2022.12.dist-info}/top_level.txt +0 -0
westpa/core/binning/mab.py
CHANGED
|
@@ -1,73 +1,67 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from typing import List, Optional
|
|
2
3
|
import numpy as np
|
|
3
4
|
import westpa
|
|
4
5
|
from westpa.core.binning import FuncBinMapper
|
|
5
6
|
from os.path import expandvars
|
|
6
7
|
|
|
7
|
-
|
|
8
8
|
log = logging.getLogger(__name__)
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class MABBinMapper(FuncBinMapper):
|
|
12
12
|
"""
|
|
13
|
-
Adaptively place bins
|
|
14
|
-
the progress
|
|
13
|
+
Adaptively place bins between minimum and maximum segments along
|
|
14
|
+
the progress coordinate. Extrema and bottleneck segments are assigned
|
|
15
15
|
to their own bins.
|
|
16
|
-
|
|
17
16
|
"""
|
|
18
17
|
|
|
19
18
|
def __init__(
|
|
20
19
|
self,
|
|
21
|
-
nbins,
|
|
22
|
-
direction=None,
|
|
23
|
-
skip=None,
|
|
24
|
-
bottleneck=True,
|
|
25
|
-
pca=False,
|
|
26
|
-
mab_log=False,
|
|
27
|
-
bin_log=False,
|
|
28
|
-
bin_log_path="$WEST_SIM_ROOT/binbounds.log",
|
|
20
|
+
nbins: List[int],
|
|
21
|
+
direction: Optional[List[int]] = None,
|
|
22
|
+
skip: Optional[List[int]] = None,
|
|
23
|
+
bottleneck: bool = True,
|
|
24
|
+
pca: bool = False,
|
|
25
|
+
mab_log: bool = False,
|
|
26
|
+
bin_log: bool = False,
|
|
27
|
+
bin_log_path: str = "$WEST_SIM_ROOT/binbounds.log",
|
|
29
28
|
):
|
|
30
29
|
"""
|
|
31
30
|
Parameters
|
|
32
31
|
----------
|
|
33
32
|
nbins : list of int
|
|
34
|
-
List of
|
|
35
|
-
direction :
|
|
36
|
-
List of
|
|
37
|
-
Direction options are as follows:
|
|
33
|
+
List of number of bins in each dimension.
|
|
34
|
+
direction : Optional[list of int], default: None
|
|
35
|
+
List of directions in each dimension. Direction options:
|
|
38
36
|
0 : default split at leading and lagging boundaries
|
|
39
37
|
1 : split at leading boundary only
|
|
40
38
|
-1 : split at lagging boundary only
|
|
41
|
-
86 : no splitting at either leading or lagging boundary
|
|
42
|
-
skip :
|
|
43
|
-
List of
|
|
44
|
-
Set to 1 to 'skip' running mab in a dimension.
|
|
39
|
+
86 : no splitting at either leading or lagging boundary (both bottlenecks included)
|
|
40
|
+
skip : Optional[list of int], default: None
|
|
41
|
+
List of skip flags for each dimension. Default None (no skipping).
|
|
45
42
|
bottleneck : bool, default: True
|
|
46
|
-
Whether to
|
|
43
|
+
Whether to enable bottleneck walker splitting.
|
|
47
44
|
pca : bool, default: False
|
|
48
|
-
|
|
45
|
+
Whether to perform PCA on progress coordinates before bin assignment.
|
|
49
46
|
mab_log : bool, default: False
|
|
50
|
-
Whether to output
|
|
47
|
+
Whether to output MAB info to west.log.
|
|
51
48
|
bin_log : bool, default: False
|
|
52
|
-
Whether to output
|
|
49
|
+
Whether to output MAB bin boundaries to a log file.
|
|
53
50
|
bin_log_path : str, default: "$WEST_SIM_ROOT/binbounds.log"
|
|
54
51
|
Path to output bin boundaries.
|
|
55
|
-
|
|
56
52
|
"""
|
|
57
53
|
# Verifying parameters
|
|
58
54
|
if nbins is None:
|
|
59
|
-
raise ValueError("
|
|
55
|
+
raise ValueError("nbins is missing")
|
|
60
56
|
ndim = len(nbins)
|
|
61
57
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
elif len(direction) != ndim:
|
|
58
|
+
direction = direction or [0] * ndim
|
|
59
|
+
if len(direction) != ndim:
|
|
65
60
|
direction = [0] * ndim
|
|
66
61
|
log.warning("Direction list is not the correct dimensions, setting to defaults.")
|
|
67
62
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
elif len(skip) != ndim:
|
|
63
|
+
skip = skip or [0] * ndim
|
|
64
|
+
if len(skip) != ndim:
|
|
71
65
|
skip = [0] * ndim
|
|
72
66
|
log.warning("Skip list is not the correct dimensions, setting to defaults.")
|
|
73
67
|
|
|
@@ -86,71 +80,80 @@ class MABBinMapper(FuncBinMapper):
|
|
|
86
80
|
|
|
87
81
|
super().__init__(map_mab, n_total_bins, kwargs=kwargs)
|
|
88
82
|
|
|
89
|
-
def determine_total_bins(
|
|
83
|
+
def determine_total_bins(
|
|
84
|
+
self, nbins_per_dim: List[int], direction: List[int], skip: List[int], bottleneck: bool, **kwargs
|
|
85
|
+
) -> int:
|
|
90
86
|
"""
|
|
91
|
-
|
|
87
|
+
Calculate the total number of bins needed, taking direction and skipping into account.
|
|
88
|
+
This function is necessary because functional bin mappers need to "reserve"
|
|
92
89
|
bins and tell the sim manager how many bins they will need to use, this is
|
|
93
90
|
determined by taking all direction/skipping info into account.
|
|
94
91
|
|
|
95
92
|
Parameters
|
|
96
93
|
----------
|
|
97
|
-
nbins_per_dim : int
|
|
98
|
-
Number of total bins in each
|
|
94
|
+
nbins_per_dim : list of int
|
|
95
|
+
Number of total bins in each dimension within the linear portion.
|
|
99
96
|
direction : list of int
|
|
100
|
-
Direction in each dimension.
|
|
97
|
+
Direction in each dimension.
|
|
101
98
|
skip : list of int
|
|
102
|
-
List
|
|
99
|
+
List indicating whether to skip each dimension.
|
|
103
100
|
bottleneck : bool
|
|
104
|
-
Whether to include separate bin for bottleneck walker(s).
|
|
101
|
+
Whether to include a separate bin for bottleneck walker(s).
|
|
105
102
|
**kwargs : dict
|
|
106
|
-
|
|
103
|
+
Additional MAB parameters (unused).
|
|
107
104
|
|
|
108
105
|
Returns
|
|
109
106
|
-------
|
|
110
107
|
n_total_bins : int
|
|
111
108
|
Number of total bins.
|
|
112
|
-
|
|
113
109
|
"""
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
110
|
+
# Update nbins_per_dim with any skipped dimensions, setting number of bins along skipped dimensions to 1
|
|
111
|
+
skip = np.array([bool(s) for s in skip])
|
|
112
|
+
nbins_per_dim = np.array(nbins_per_dim)
|
|
113
|
+
nbins_per_dim[skip] = 1
|
|
114
|
+
|
|
115
|
+
# Total bins is product of all linear bins plus and special bins
|
|
116
|
+
n_total_bins = nbins_per_dim.prod()
|
|
117
|
+
for direct, skip_dim in zip(direction, skip):
|
|
118
|
+
if not skip_dim:
|
|
119
|
+
if direct in [-1, 1]:
|
|
120
|
+
# 1 lead or lag bin + 1 bottleneck bin
|
|
119
121
|
n_total_bins += 1 + 1 * bottleneck
|
|
120
|
-
|
|
122
|
+
elif direct == 0:
|
|
123
|
+
# 2 lead/lag bins + 2 bottleneck bins
|
|
121
124
|
n_total_bins += 2 + 2 * bottleneck
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
+
elif direct == 86:
|
|
126
|
+
# 0 lead/lag + 2 bottleneck bins
|
|
127
|
+
n_total_bins += 2 * bottleneck
|
|
125
128
|
return n_total_bins
|
|
126
129
|
|
|
127
130
|
|
|
128
|
-
def map_mab(coords, mask, output, *args, **kwargs):
|
|
131
|
+
def map_mab(coords: np.ndarray, mask: np.ndarray, output: List[int], *args, **kwargs) -> List[int]:
|
|
129
132
|
"""
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
+
Adaptively place bins based on extrema and bottleneck segments along the progress coordinate.
|
|
134
|
+
|
|
135
|
+
Bottleneck segments are where the difference in probability is the greatest
|
|
136
|
+
along the progress coordinate. Operates per dimension (unless skipped) and places a fixed number of
|
|
133
137
|
evenly spaced bins between the segments with the min and max pcoord values. Extrema and
|
|
134
138
|
bottleneck segments are assigned their own bins.
|
|
135
139
|
|
|
136
140
|
Parameters
|
|
137
141
|
----------
|
|
138
|
-
coords : ndarray
|
|
142
|
+
coords : np.ndarray
|
|
139
143
|
An array with pcoord and weight info.
|
|
140
|
-
mask : ndarray
|
|
141
|
-
|
|
144
|
+
mask : np.ndarray
|
|
145
|
+
Boolean array to filter out unwanted segments.
|
|
142
146
|
output : list
|
|
143
147
|
The main list that, for each segment, holds the bin assignment.
|
|
144
148
|
*args : list
|
|
145
|
-
|
|
149
|
+
Additional arguments.
|
|
146
150
|
**kwargs : dict
|
|
147
|
-
|
|
151
|
+
Additional keyword arguments. Contains most of the MAB-needed parameters.
|
|
148
152
|
|
|
149
153
|
Returns
|
|
150
154
|
------
|
|
151
155
|
output : list
|
|
152
|
-
|
|
153
|
-
|
|
156
|
+
List with bin assignments for each segment.
|
|
154
157
|
"""
|
|
155
158
|
|
|
156
159
|
# Argument Processing
|
|
@@ -158,8 +161,8 @@ def map_mab(coords, mask, output, *args, **kwargs):
|
|
|
158
161
|
ndim = len(nbins_per_dim)
|
|
159
162
|
pca = kwargs.get("pca", False)
|
|
160
163
|
bottleneck = kwargs.get("bottleneck", True)
|
|
161
|
-
direction = kwargs.get("direction",
|
|
162
|
-
skip = kwargs.get("skip",
|
|
164
|
+
direction = kwargs.get("direction", [0] * ndim)
|
|
165
|
+
skip = kwargs.get("skip", [0] * ndim)
|
|
163
166
|
mab_log = kwargs.get("mab_log", False)
|
|
164
167
|
bin_log = kwargs.get("bin_log", False)
|
|
165
168
|
bin_log_path = kwargs.get("bin_log_path", "$WEST_SIM_ROOT/binbounds.log")
|
|
@@ -170,8 +173,8 @@ def map_mab(coords, mask, output, *args, **kwargs):
|
|
|
170
173
|
if skip is None:
|
|
171
174
|
skip = [0] * ndim
|
|
172
175
|
|
|
173
|
-
allcoords =
|
|
174
|
-
allmask =
|
|
176
|
+
allcoords = coords.copy()
|
|
177
|
+
allmask = mask.copy()
|
|
175
178
|
|
|
176
179
|
weights = None
|
|
177
180
|
isfinal = None
|
|
@@ -185,11 +188,11 @@ def map_mab(coords, mask, output, *args, **kwargs):
|
|
|
185
188
|
if coords[0, -1] == 0:
|
|
186
189
|
report = True
|
|
187
190
|
if coords.shape[1] > ndim + 1:
|
|
188
|
-
isfinal = allcoords[:, ndim + 1].astype(
|
|
191
|
+
isfinal = allcoords[:, ndim + 1].astype(bool)
|
|
189
192
|
else:
|
|
190
|
-
isfinal = np.ones(coords.shape[0], dtype=
|
|
193
|
+
isfinal = np.ones(coords.shape[0], dtype=bool)
|
|
191
194
|
coords = coords[isfinal, :ndim]
|
|
192
|
-
weights = allcoords[isfinal, ndim
|
|
195
|
+
weights = allcoords[isfinal, ndim]
|
|
193
196
|
mask = mask[isfinal]
|
|
194
197
|
splitting = True
|
|
195
198
|
|
|
@@ -199,229 +202,305 @@ def map_mab(coords, mask, output, *args, **kwargs):
|
|
|
199
202
|
weights = None
|
|
200
203
|
splitting = False
|
|
201
204
|
|
|
202
|
-
varcoords = np.copy(coords)
|
|
203
205
|
originalcoords = np.copy(coords)
|
|
204
206
|
if pca and len(output) > 1:
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
eigvec = eigvec[:, np.argmax(np.absolute(eigvec), axis=1)]
|
|
212
|
-
for i in range(len(eigvec)):
|
|
213
|
-
if eigvec[i, i] < 0:
|
|
214
|
-
eigvec[:, i] = -1 * eigvec[:, i]
|
|
215
|
-
for i in range(ndim):
|
|
216
|
-
for j in range(len(output)):
|
|
217
|
-
coords[j][i] = np.dot(varcoords[j], eigvec[:, i])
|
|
218
|
-
|
|
219
|
-
maxlist = []
|
|
220
|
-
minlist = []
|
|
221
|
-
difflist = []
|
|
222
|
-
flipdifflist = []
|
|
223
|
-
for n in range(ndim):
|
|
224
|
-
# identify the boundary segments
|
|
225
|
-
maxcoord = np.max(coords[mask, n])
|
|
226
|
-
mincoord = np.min(coords[mask, n])
|
|
227
|
-
maxlist.append(maxcoord)
|
|
228
|
-
minlist.append(mincoord)
|
|
229
|
-
|
|
230
|
-
# detect the bottleneck segments, this uses the weights
|
|
231
|
-
if splitting:
|
|
232
|
-
temp = np.column_stack((originalcoords[mask, n], weights[mask]))
|
|
233
|
-
sorted_indices = temp[:, 0].argsort()
|
|
234
|
-
temp = temp[sorted_indices]
|
|
235
|
-
for p in range(len(temp)):
|
|
236
|
-
if temp[p][1] == 0:
|
|
237
|
-
temp[p][1] = 10**-323
|
|
238
|
-
fliptemp = np.flipud(temp)
|
|
239
|
-
|
|
240
|
-
difflist.append(None)
|
|
241
|
-
flipdifflist.append(None)
|
|
242
|
-
maxdiff = 0
|
|
243
|
-
flipmaxdiff = 0
|
|
244
|
-
for i in range(1, len(temp) - 1):
|
|
245
|
-
comprob = 0
|
|
246
|
-
flipcomprob = 0
|
|
247
|
-
j = i + 1
|
|
248
|
-
while j < len(temp):
|
|
249
|
-
comprob = comprob + temp[j][1]
|
|
250
|
-
flipcomprob = flipcomprob + fliptemp[j][1]
|
|
251
|
-
j = j + 1
|
|
252
|
-
diff = -np.log(comprob) + np.log(temp[i][1])
|
|
253
|
-
if diff > maxdiff:
|
|
254
|
-
difflist[n] = temp[i][0]
|
|
255
|
-
maxdiff = diff
|
|
256
|
-
flipdiff = -np.log(flipcomprob) + np.log(fliptemp[i][1])
|
|
257
|
-
if flipdiff > flipmaxdiff:
|
|
258
|
-
flipdifflist[n] = fliptemp[i][0]
|
|
259
|
-
flipmaxdiff = flipdiff
|
|
207
|
+
coords = apply_pca(coords, weights)
|
|
208
|
+
|
|
209
|
+
# Computing special bins (bottleneck and boundary bins)
|
|
210
|
+
minlist, maxlist, bottlenecks_forward, bottlenecks_reverse = calculate_bin_boundaries(
|
|
211
|
+
originalcoords, weights, mask, skip, splitting, bottleneck
|
|
212
|
+
)
|
|
260
213
|
|
|
261
214
|
if mab_log and report:
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
215
|
+
log_mab_stats(minlist, maxlist, direction, skip)
|
|
216
|
+
|
|
217
|
+
# Assign segments to bins
|
|
218
|
+
n_bottleneck_filled = bin_assignment(
|
|
219
|
+
allcoords,
|
|
220
|
+
allmask,
|
|
221
|
+
minlist,
|
|
222
|
+
maxlist,
|
|
223
|
+
bottlenecks_forward,
|
|
224
|
+
bottlenecks_reverse,
|
|
225
|
+
nbins_per_dim,
|
|
226
|
+
direction,
|
|
227
|
+
skip,
|
|
228
|
+
splitting,
|
|
229
|
+
bottleneck,
|
|
230
|
+
output,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Report MAB bin statistics
|
|
234
|
+
if bin_log and report and westpa.rc.sim_manager.n_iter:
|
|
235
|
+
log_bin_boundaries(
|
|
236
|
+
skip,
|
|
237
|
+
bottleneck,
|
|
238
|
+
direction,
|
|
239
|
+
bin_log_path,
|
|
240
|
+
minlist,
|
|
241
|
+
maxlist,
|
|
242
|
+
nbins_per_dim,
|
|
243
|
+
n_bottleneck_filled,
|
|
244
|
+
bottlenecks_forward,
|
|
245
|
+
bottlenecks_reverse,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
return output
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def apply_pca(coords, weights):
|
|
252
|
+
colavg = np.mean(coords, axis=0)
|
|
253
|
+
varcoords = coords - colavg
|
|
254
|
+
covcoords = np.cov(varcoords.T, aweights=weights)
|
|
255
|
+
eigval, eigvec = np.linalg.eigh(covcoords)
|
|
256
|
+
eigvec = eigvec[:, np.argmax(np.abs(eigvec), axis=1)]
|
|
257
|
+
eigvec[:, np.diag(eigvec) < 0] *= -1
|
|
258
|
+
return np.dot(varcoords, eigvec)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def calculate_bin_boundaries(coords, weights, mask, skip, splitting, bottleneck):
|
|
262
|
+
"""
|
|
263
|
+
This function calculates minima, maxima, and bottleneck segments.
|
|
264
|
+
"""
|
|
265
|
+
skip = np.array([bool(s) for s in skip])
|
|
266
|
+
|
|
267
|
+
# Initialize lists to hold minima and maxima along each dimension
|
|
268
|
+
minlist, maxlist = [], []
|
|
269
|
+
# Initialize lists to hold bottleneck segments along each dimension
|
|
270
|
+
bottlenecks_forward, bottlenecks_reverse = [None] * len(coords[0]), [None] * len(coords[0])
|
|
271
|
+
# number of unmasked coords
|
|
272
|
+
n_coords = mask.sum()
|
|
273
|
+
# Grabbing all unmasked coords and weights
|
|
274
|
+
unmasked_coords = coords[mask, :]
|
|
275
|
+
unmasked_weights = weights[mask] if weights is not None else None
|
|
276
|
+
# Replace any zero weights with non-zero values so that log(weight) is well-defined
|
|
277
|
+
if unmasked_weights is not None:
|
|
278
|
+
unmasked_weights[unmasked_weights == 0] = 10**-323
|
|
279
|
+
# Looping over each dimension of progress coordinate, even those being skipped
|
|
280
|
+
for n in range(len(coords[0])):
|
|
281
|
+
# We calculate the min and max pcoord along each dimension (boundary segments) even if skipping
|
|
282
|
+
maxlist.append(np.max(coords[mask, n]))
|
|
283
|
+
minlist.append(np.min(coords[mask, n]))
|
|
284
|
+
# Now we calculate the bottleneck segments
|
|
285
|
+
if splitting and bottleneck and not skip[n]:
|
|
286
|
+
bottlenecks_forward[n], bottlenecks_reverse[n] = detect_bottlenecks(unmasked_coords, unmasked_weights, n_coords, n)
|
|
287
|
+
|
|
288
|
+
return minlist, maxlist, bottlenecks_forward, bottlenecks_reverse
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def detect_bottlenecks(unmasked_coords, unmasked_weights, n_coords, n):
|
|
292
|
+
"""
|
|
293
|
+
Detect the bottleneck segments along the given coordinate n, this uses the weights
|
|
294
|
+
"""
|
|
295
|
+
# Grabbing all unmasked coords in current dimension, plus corresponding weights
|
|
296
|
+
# Sort by current dimension in coord, smallest to largest
|
|
297
|
+
sorted_indices = unmasked_coords[:, n].argsort(kind='stable')
|
|
298
|
+
|
|
299
|
+
# Grab sorted coords and weights
|
|
300
|
+
coords_srt = unmasked_coords[sorted_indices, :]
|
|
301
|
+
weights_srt = unmasked_weights[sorted_indices]
|
|
302
|
+
|
|
303
|
+
# Also sort in reverse order for opposite direction
|
|
304
|
+
coords_srt_flip = np.flipud(coords_srt)
|
|
305
|
+
weights_srt_flip = np.flipud(weights_srt)
|
|
306
|
+
|
|
307
|
+
# Initialize the max directional differences along current dimension as None (these may not be updated)
|
|
308
|
+
bottleneck_coords, bottleneck_coords_flip = None, None
|
|
309
|
+
maxdiff, maxdiff_flip = -np.inf, -np.inf
|
|
310
|
+
|
|
311
|
+
# Looping through all non-boundary coords
|
|
312
|
+
# Compute the cumulative weight on either side of each non-boundary walker
|
|
313
|
+
for i in range(1, n_coords - 1):
|
|
314
|
+
# Summing up weights of all walkers ahead of current walker along current dim in both directions
|
|
315
|
+
cumulative_prob = np.sum(weights_srt[i + 1 :])
|
|
316
|
+
cumulative_prob_flip = np.sum(weights_srt_flip[i + 1 :])
|
|
317
|
+
# Compute the difference of log cumulative weight of current walker and all walkers ahead of it (Z im the MAB paper)
|
|
318
|
+
# We use the log as weights vary over many orders of magnitude
|
|
319
|
+
# Note a negative Z indicates the cumulative weight ahead of the current walker is larger than the weight of the current walker,
|
|
320
|
+
# while a positive Z indicates the cumulative weight ahead of the current walker is smaller, indicating a barrier
|
|
321
|
+
Z = np.log(weights_srt[i]) - np.log(cumulative_prob)
|
|
322
|
+
Z_flip = np.log(weights_srt_flip[i]) - np.log(cumulative_prob_flip)
|
|
323
|
+
# Update ALL coords of the current walker into bottlenecks_forward if it is largest
|
|
324
|
+
# This way we uniquely identify a walker by its full set of coordinates
|
|
325
|
+
if Z > maxdiff:
|
|
326
|
+
bottleneck_coords = coords_srt[i, :]
|
|
327
|
+
maxdiff = Z
|
|
328
|
+
if Z_flip > maxdiff_flip:
|
|
329
|
+
bottleneck_coords_flip = coords_srt_flip[i, :]
|
|
330
|
+
maxdiff_flip = Z_flip
|
|
331
|
+
return bottleneck_coords, bottleneck_coords_flip
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def log_mab_stats(minlist, maxlist, direction, skip):
|
|
335
|
+
westpa.rc.pstatus("################ MAB stats ################")
|
|
336
|
+
westpa.rc.pstatus(f"minima in each dimension: {minlist}")
|
|
337
|
+
westpa.rc.pstatus(f"maxima in each dimension: {maxlist}")
|
|
338
|
+
westpa.rc.pstatus(f"direction in each dimension: {direction}")
|
|
339
|
+
westpa.rc.pstatus(f"skip in each dimension: {skip}")
|
|
340
|
+
westpa.rc.pstatus("###########################################")
|
|
341
|
+
westpa.rc.pflush()
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def bin_assignment(
|
|
345
|
+
coords,
|
|
346
|
+
mask,
|
|
347
|
+
minlist,
|
|
348
|
+
maxlist,
|
|
349
|
+
bottlenecks_forward,
|
|
350
|
+
bottlenecks_reverse,
|
|
351
|
+
nbins_per_dim,
|
|
352
|
+
direction,
|
|
353
|
+
skip,
|
|
354
|
+
splitting,
|
|
355
|
+
bottleneck,
|
|
356
|
+
output,
|
|
357
|
+
):
|
|
358
|
+
"""
|
|
359
|
+
Assign segments to bins based on the minima, maxima, and
|
|
360
|
+
bottleneck segments along the progress coordinate.
|
|
361
|
+
"""
|
|
362
|
+
# Update nbins_per_dim with any skipped dimensions, setting number of bins along skipped dimensions to 1
|
|
363
|
+
skip = np.array([bool(s) for s in skip])
|
|
364
|
+
nbins_per_dim = np.array(nbins_per_dim)
|
|
365
|
+
nbins_per_dim[skip] = 1
|
|
366
|
+
direction = np.array(direction)
|
|
367
|
+
|
|
368
|
+
ndim = len(nbins_per_dim)
|
|
278
369
|
n_bottleneck_filled = 0
|
|
279
370
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
elif direction[i] == 0:
|
|
286
|
-
bottleneck_base += 2
|
|
287
|
-
# for 86 direction, no boundary walkers so offset of 0
|
|
288
|
-
elif direction[i] == 86:
|
|
289
|
-
bottleneck_base += 0
|
|
290
|
-
|
|
291
|
-
# if a dimension is being "skipped", leave only one bin total as
|
|
292
|
-
# the offset
|
|
293
|
-
for i in range(0, ndim):
|
|
294
|
-
if skip[i] != 0:
|
|
295
|
-
boundary_base -= nbins_per_dim[i] - 1
|
|
371
|
+
# Boolean arrays that track use of special bins along each dimension
|
|
372
|
+
skip_bneck_fwd = np.array([d == -1 if bottleneck else True for d in direction]) + skip
|
|
373
|
+
skip_bneck_rev = np.array([d == 1 if bottleneck else True for d in direction]) + skip
|
|
374
|
+
skip_lead = np.array([d in [86, -1] for d in direction]) + skip
|
|
375
|
+
skip_lag = np.array([d in [86, 1] for d in direction]) + skip
|
|
296
376
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
continue
|
|
377
|
+
# List of dimensions that are not skipped
|
|
378
|
+
active_dims = np.array([n for n in range(ndim) if not skip[n]])
|
|
300
379
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
380
|
+
# Compute the boundary bin ID offsets
|
|
381
|
+
# In forward direction, this is all the linear bins
|
|
382
|
+
boundary_bin_id_offset_fwd = nbins_per_dim.prod()
|
|
383
|
+
# In reverse, we add the number of forward boundary bins to the offset
|
|
384
|
+
boundary_bin_id_offset_rev = boundary_bin_id_offset_fwd + (~skip_lead).sum()
|
|
385
|
+
|
|
386
|
+
# Compute the bottleneck bin ID offsets
|
|
387
|
+
# In forward direction, bin IDs are offset by all linear and boundary bins
|
|
388
|
+
bneck_bin_id_offset_fwd = boundary_bin_id_offset_rev + (~skip_lag).sum()
|
|
389
|
+
# In reverse, we add the number of forward bottleneck bins to the offset
|
|
390
|
+
bneck_bin_id_offset_rev = bneck_bin_id_offset_fwd + (~skip_bneck_fwd).sum()
|
|
308
391
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
392
|
+
# Bin assignment loop over all walkers
|
|
393
|
+
for i in range(len(output)):
|
|
394
|
+
# Skip masked walkers, these walkers bin IDs are unchanged
|
|
395
|
+
if not mask[i]:
|
|
396
|
+
continue
|
|
397
|
+
# Initialize bin ID and special tracker for current coord
|
|
398
|
+
# The special variable indicates a boundary or bottleneck walker (not assigned to the linear space)
|
|
399
|
+
bin_id, special = 0, False
|
|
400
|
+
|
|
401
|
+
# Searching for bottleneck bins first
|
|
402
|
+
if splitting and bottleneck:
|
|
403
|
+
for n in active_dims:
|
|
404
|
+
# Grab coord(s) of current walker
|
|
405
|
+
coord = coords[i][:ndim]
|
|
406
|
+
# Assign bottlenecks, taking directionality into account
|
|
407
|
+
# Check both directions when using 0 or 86
|
|
408
|
+
# Note: 86 implies no leading or lagging bins, but does add bottlenecks for *both* directions when bottleneck is enabled
|
|
409
|
+
# Note: All bottleneck bins will typically be filled unless a walker is simultaneously in bottleneck bins along multiple dimensions
|
|
410
|
+
# or there are too few walkers to compute free energy barriers
|
|
411
|
+
if (coord == bottlenecks_forward[n]).all() and not skip_bneck_fwd[n]:
|
|
412
|
+
bin_id = bneck_bin_id_offset_fwd + n - skip_bneck_fwd[:n].sum()
|
|
413
|
+
special = True
|
|
414
|
+
n_bottleneck_filled += 1
|
|
415
|
+
break
|
|
416
|
+
elif (coord == bottlenecks_reverse[n]).all() and not skip_bneck_rev[n]:
|
|
417
|
+
bin_id = bneck_bin_id_offset_rev + n - skip_bneck_rev[:n].sum()
|
|
418
|
+
special = True
|
|
419
|
+
n_bottleneck_filled += 1
|
|
312
420
|
break
|
|
313
421
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
n_bottleneck_filled += 1
|
|
328
|
-
break
|
|
329
|
-
|
|
330
|
-
# both directions when using 0 or with
|
|
331
|
-
# special value of 86 for no lead/lag split
|
|
332
|
-
if direction[n] == 0 or direction[n] == 86:
|
|
333
|
-
if coord == difflist[n]:
|
|
334
|
-
holder = bottleneck_base + n
|
|
335
|
-
special = True
|
|
336
|
-
n_bottleneck_filled += 1
|
|
337
|
-
break
|
|
338
|
-
elif coord == flipdifflist[n]:
|
|
339
|
-
holder = bottleneck_base + n + 1
|
|
340
|
-
special = True
|
|
341
|
-
n_bottleneck_filled += 1
|
|
342
|
-
break
|
|
343
|
-
|
|
344
|
-
# assign boundary walkers, taking directionality into account
|
|
345
|
-
if direction[n] == -1:
|
|
346
|
-
if coord == minlist[n]:
|
|
347
|
-
holder = boundary_base + n
|
|
348
|
-
special = True
|
|
349
|
-
break
|
|
350
|
-
|
|
351
|
-
elif direction[n] == 1:
|
|
352
|
-
if coord == maxlist[n]:
|
|
353
|
-
holder = boundary_base + n
|
|
354
|
-
special = True
|
|
355
|
-
break
|
|
356
|
-
|
|
357
|
-
elif direction[n] == 0:
|
|
358
|
-
if coord == minlist[n]:
|
|
359
|
-
holder = boundary_base + n
|
|
360
|
-
special = True
|
|
361
|
-
break
|
|
362
|
-
elif coord == maxlist[n]:
|
|
363
|
-
holder = boundary_base + n + 1
|
|
364
|
-
special = True
|
|
365
|
-
break
|
|
366
|
-
|
|
367
|
-
# special value for direction with no lead/lag split
|
|
368
|
-
elif direction[n] == 86:
|
|
369
|
-
# westpa.rc.pstatus(f"No lead/lag split for dim {n}")
|
|
370
|
-
# westpa.rc.pflush()
|
|
371
|
-
# nornmally adds to special bin but here just leaving it forever empty
|
|
372
|
-
# holder = boundary_base + n
|
|
422
|
+
# Now check for boundary walkers, taking directionality into account
|
|
423
|
+
# This should only be done after fully checking for bottleneck walkers
|
|
424
|
+
if splitting and not special:
|
|
425
|
+
for n in active_dims:
|
|
426
|
+
# Grab coord of current walker along current dimension
|
|
427
|
+
coord = coords[i, n]
|
|
428
|
+
if (coord == maxlist[n]) and not skip_lead[n]:
|
|
429
|
+
bin_id = boundary_bin_id_offset_fwd + n - skip_lead[:n].sum()
|
|
430
|
+
special = True
|
|
431
|
+
break
|
|
432
|
+
elif (coord == minlist[n]) and not skip_lag[n]:
|
|
433
|
+
bin_id = boundary_bin_id_offset_rev + n - skip_lag[:n].sum()
|
|
434
|
+
special = True
|
|
373
435
|
break
|
|
374
436
|
|
|
375
|
-
#
|
|
437
|
+
# Now check for linear bin walkers
|
|
376
438
|
if not special:
|
|
439
|
+
# Again we loop over the dimensions
|
|
440
|
+
# Note: no need to worry about skipping as we've already set all skipped dimensions to 1 bin
|
|
377
441
|
for n in range(ndim):
|
|
378
|
-
|
|
379
|
-
if skip[n] != 0:
|
|
380
|
-
holder = boundary_base + n
|
|
381
|
-
break
|
|
382
|
-
|
|
383
|
-
coord = allcoords[i][n]
|
|
442
|
+
coord = coords[i][n]
|
|
384
443
|
nbins = nbins_per_dim[n]
|
|
385
444
|
minp = minlist[n]
|
|
386
445
|
maxp = maxlist[n]
|
|
387
446
|
|
|
447
|
+
# Generate the bins along this dimension
|
|
388
448
|
bins = np.linspace(minp, maxp, nbins + 1)
|
|
389
|
-
bin_number = np.digitize(coord, bins) - 1
|
|
390
|
-
|
|
391
|
-
if isfinal is None or not isfinal[i]:
|
|
392
|
-
if bin_number >= nbins:
|
|
393
|
-
bin_number = nbins - 1
|
|
394
|
-
elif bin_number < 0:
|
|
395
|
-
bin_number = 0
|
|
396
|
-
elif bin_number >= nbins or bin_number < 0:
|
|
397
|
-
if np.isclose(bins[-1], coord):
|
|
398
|
-
bin_number = nbins - 1
|
|
399
|
-
elif np.isclose(bins[0], coord):
|
|
400
|
-
bin_number = 0
|
|
401
|
-
else:
|
|
402
|
-
raise ValueError("Walker out of boundary")
|
|
403
|
-
|
|
404
|
-
holder += bin_number * np.prod(nbins_per_dim[:n])
|
|
405
|
-
|
|
406
|
-
# output is the main list that, for each segment, holds the bin assignment
|
|
407
|
-
output[i] = holder
|
|
408
|
-
|
|
409
|
-
if bin_log and report:
|
|
410
|
-
if westpa.rc.sim_manager.n_iter:
|
|
411
|
-
with open(expandvars(bin_log_path), 'a') as bb_file:
|
|
412
|
-
# Iteration Number
|
|
413
|
-
bb_file.write(f'iteration: {westpa.rc.sim_manager.n_iter}\n')
|
|
414
|
-
bb_file.write('bin boundaries: ')
|
|
415
|
-
for n in range(ndim):
|
|
416
|
-
# Write binbounds per dim
|
|
417
|
-
bb_file.write(f'{np.linspace(minlist[n], maxlist[n], nbins_per_dim[n] + 1)}\t')
|
|
418
|
-
# Min/Max pcoord
|
|
419
|
-
bb_file.write(f'\nmin/max pcoord: {minlist} {maxlist}\n')
|
|
420
|
-
bb_file.write(f'bottleneck bins: {n_bottleneck_filled}\n')
|
|
421
|
-
if n_bottleneck_filled > 0:
|
|
422
|
-
# Bottlenecks bins exist (passes any of the if bottleneck: checks)
|
|
423
|
-
bb_file.write(f'bottleneck pcoord: {flipdifflist} {difflist}\n\n')
|
|
424
|
-
else:
|
|
425
|
-
bb_file.write('\n')
|
|
426
449
|
|
|
427
|
-
|
|
450
|
+
# Assign walker to a bin along this dimension
|
|
451
|
+
bin_number = np.digitize(coord, bins) - 1 # note np.digitize is 1-indexed
|
|
452
|
+
|
|
453
|
+
# Sometimes the walker is exactly at the max/min value,
|
|
454
|
+
# which would put it in the next bin
|
|
455
|
+
if bin_number == nbins:
|
|
456
|
+
bin_number -= 1
|
|
457
|
+
elif bin_number == -1:
|
|
458
|
+
bin_number = 0
|
|
459
|
+
elif bin_number > nbins or bin_number < -1:
|
|
460
|
+
raise ValueError("Walker out of boundary.")
|
|
461
|
+
|
|
462
|
+
# Assign to bin within the full dimensional space
|
|
463
|
+
bin_id += bin_number * np.prod(nbins_per_dim[:n])
|
|
464
|
+
|
|
465
|
+
# Output is the main list that, for each segment, holds the bin assignment
|
|
466
|
+
output[i] = bin_id
|
|
467
|
+
return n_bottleneck_filled
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def log_bin_boundaries(
|
|
471
|
+
skip,
|
|
472
|
+
bottleneck,
|
|
473
|
+
direction,
|
|
474
|
+
bin_log_path,
|
|
475
|
+
minlist,
|
|
476
|
+
maxlist,
|
|
477
|
+
nbins_per_dim,
|
|
478
|
+
n_bottleneck_filled,
|
|
479
|
+
bottlenecks_forward,
|
|
480
|
+
bottlenecks_reverse,
|
|
481
|
+
):
|
|
482
|
+
ndim = len(nbins_per_dim)
|
|
483
|
+
skip = np.array([bool(s) for s in skip])
|
|
484
|
+
active_dims = np.array([n for n in range(ndim) if not skip[n]])
|
|
485
|
+
max_bottleneck = np.sum([1 if direction[n] in [-1, 1] else 2 for n in active_dims]) if bottleneck else 0
|
|
486
|
+
with open(expandvars(bin_log_path), 'a') as bb_file:
|
|
487
|
+
# Iteration Number
|
|
488
|
+
bb_file.write(f'Iteration: {westpa.rc.sim_manager.n_iter}\n')
|
|
489
|
+
bb_file.write('MAB linear bin boundaries: ')
|
|
490
|
+
for n in range(ndim):
|
|
491
|
+
# Write binbounds per dim
|
|
492
|
+
bb_file.write(f'{np.linspace(minlist[n], maxlist[n], nbins_per_dim[n] + 1)}\t')
|
|
493
|
+
# Min/Max pcoord
|
|
494
|
+
bb_file.write(f'\nLagging pcoord in each dimension: {minlist}\n')
|
|
495
|
+
bb_file.write(f'Leading pcoord in each dimension: {maxlist}\n')
|
|
496
|
+
# Bottlenecks bins exist
|
|
497
|
+
if bottleneck:
|
|
498
|
+
bb_file.write(f'Number of bottleneck bins filled: {n_bottleneck_filled} / {max_bottleneck}\n')
|
|
499
|
+
for n in active_dims:
|
|
500
|
+
if direction[n] in [0, 1, 86]:
|
|
501
|
+
bb_file.write(f'Dimension {n} forward bottleneck walker at: {list(bottlenecks_forward[n])}\n')
|
|
502
|
+
if direction[n] in [0, -1, 86]:
|
|
503
|
+
bb_file.write(f'Dimension {n} backward bottleneck walker at: {list(bottlenecks_reverse[n])}\n')
|
|
504
|
+
bb_file.write('\n')
|
|
505
|
+
else:
|
|
506
|
+
bb_file.write('\n')
|