westpa 2022.10__cp310-cp310-macosx_11_0_arm64.whl → 2022.12__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of westpa might be problematic. Click here for more details.

Files changed (39) hide show
  1. westpa/_version.py +3 -3
  2. westpa/cli/core/w_truncate.py +15 -6
  3. westpa/cli/tools/w_assign.py +4 -4
  4. westpa/cli/tools/w_fluxanl.py +1 -3
  5. westpa/cli/tools/w_ntop.py +2 -2
  6. westpa/cli/tools/w_red.py +7 -2
  7. westpa/core/binning/_assign.cpython-310-darwin.so +0 -0
  8. westpa/core/binning/assign.py +11 -5
  9. westpa/core/binning/mab.py +352 -273
  10. westpa/core/data_manager.py +3 -3
  11. westpa/core/h5io.py +2 -2
  12. westpa/core/kinetics/_kinetics.cpython-310-darwin.so +0 -0
  13. westpa/core/kinetics/matrates.py +1 -1
  14. westpa/core/propagators/executable.py +11 -7
  15. westpa/core/reweight/_reweight.cpython-310-darwin.so +0 -0
  16. westpa/core/sim_manager.py +9 -4
  17. westpa/core/states.py +7 -7
  18. westpa/core/we_driver.py +4 -2
  19. westpa/fasthist/_fasthist.cpython-310-darwin.so +0 -0
  20. westpa/mclib/__init__.py +10 -3
  21. westpa/mclib/_mclib.cpython-310-darwin.so +0 -0
  22. westpa/oldtools/aframe/mcbs.py +9 -2
  23. westpa/oldtools/aframe/plotting.py +4 -4
  24. westpa/oldtools/cmds/w_ttimes.py +4 -1
  25. westpa/oldtools/stats/edfs.py +1 -1
  26. westpa/oldtools/stats/mcbs.py +9 -2
  27. westpa/trajtree/_trajtree.cpython-310-darwin.so +0 -0
  28. westpa/westext/stringmethod/string_method.py +1 -1
  29. westpa/westext/weed/ProbAdjustEquil.py +2 -2
  30. westpa/westext/weed/weed_driver.py +10 -0
  31. westpa/westext/wess/wess_driver.py +10 -0
  32. {westpa-2022.10.dist-info → westpa-2022.12.dist-info}/AUTHORS +8 -8
  33. {westpa-2022.10.dist-info → westpa-2022.12.dist-info}/METADATA +31 -21
  34. {westpa-2022.10.dist-info → westpa-2022.12.dist-info}/RECORD +38 -39
  35. {westpa-2022.10.dist-info → westpa-2022.12.dist-info}/WHEEL +2 -1
  36. westpa/fasthist/__main__.py +0 -110
  37. {westpa-2022.10.dist-info → westpa-2022.12.dist-info}/LICENSE +0 -0
  38. {westpa-2022.10.dist-info → westpa-2022.12.dist-info}/entry_points.txt +0 -0
  39. {westpa-2022.10.dist-info → westpa-2022.12.dist-info}/top_level.txt +0 -0
@@ -1,73 +1,67 @@
1
1
  import logging
2
+ from typing import List, Optional
2
3
  import numpy as np
3
4
  import westpa
4
5
  from westpa.core.binning import FuncBinMapper
5
6
  from os.path import expandvars
6
7
 
7
-
8
8
  log = logging.getLogger(__name__)
9
9
 
10
10
 
11
11
  class MABBinMapper(FuncBinMapper):
12
12
  """
13
- Adaptively place bins in between minimum and maximum segments along
14
- the progress coordinte. Extrema and bottleneck segments are assigned
13
+ Adaptively place bins between minimum and maximum segments along
14
+ the progress coordinate. Extrema and bottleneck segments are assigned
15
15
  to their own bins.
16
-
17
16
  """
18
17
 
19
18
  def __init__(
20
19
  self,
21
- nbins,
22
- direction=None,
23
- skip=None,
24
- bottleneck=True,
25
- pca=False,
26
- mab_log=False,
27
- bin_log=False,
28
- bin_log_path="$WEST_SIM_ROOT/binbounds.log",
20
+ nbins: List[int],
21
+ direction: Optional[List[int]] = None,
22
+ skip: Optional[List[int]] = None,
23
+ bottleneck: bool = True,
24
+ pca: bool = False,
25
+ mab_log: bool = False,
26
+ bin_log: bool = False,
27
+ bin_log_path: str = "$WEST_SIM_ROOT/binbounds.log",
29
28
  ):
30
29
  """
31
30
  Parameters
32
31
  ----------
33
32
  nbins : list of int
34
- List of int for nbins in each dimension.
35
- direction : Union(list of int, None), default: None
36
- List of int for 'direction' in each dimension.
37
- Direction options are as follows:
33
+ List of number of bins in each dimension.
34
+ direction : Optional[list of int], default: None
35
+ List of directions in each dimension. Direction options:
38
36
  0 : default split at leading and lagging boundaries
39
37
  1 : split at leading boundary only
40
38
  -1 : split at lagging boundary only
41
- 86 : no splitting at either leading or lagging boundary
42
- skip : Union(list of int, None), default: None
43
- List of int for each dimension. Default None for skip=0.
44
- Set to 1 to 'skip' running mab in a dimension.
39
+ 86 : no splitting at either leading or lagging boundary (both bottlenecks included)
40
+ skip : Optional[list of int], default: None
41
+ List of skip flags for each dimension. Default None (no skipping).
45
42
  bottleneck : bool, default: True
46
- Whether to turn on or off bottleneck walker splitting.
43
+ Whether to enable bottleneck walker splitting.
47
44
  pca : bool, default: False
48
- Can be True or False (default) to run PCA on pcoords before bin assignment.
45
+ Whether to perform PCA on progress coordinates before bin assignment.
49
46
  mab_log : bool, default: False
50
- Whether to output mab info to west.log.
47
+ Whether to output MAB info to west.log.
51
48
  bin_log : bool, default: False
52
- Whether to output mab bin boundaries to bin_log_path file.
49
+ Whether to output MAB bin boundaries to a log file.
53
50
  bin_log_path : str, default: "$WEST_SIM_ROOT/binbounds.log"
54
51
  Path to output bin boundaries.
55
-
56
52
  """
57
53
  # Verifying parameters
58
54
  if nbins is None:
59
- raise ValueError("nbins_per_dim is missing")
55
+ raise ValueError("nbins is missing")
60
56
  ndim = len(nbins)
61
57
 
62
- if direction is None:
63
- direction = [0] * ndim
64
- elif len(direction) != ndim:
58
+ direction = direction or [0] * ndim
59
+ if len(direction) != ndim:
65
60
  direction = [0] * ndim
66
61
  log.warning("Direction list is not the correct dimensions, setting to defaults.")
67
62
 
68
- if skip is None:
69
- skip = [0] * ndim
70
- elif len(skip) != ndim:
63
+ skip = skip or [0] * ndim
64
+ if len(skip) != ndim:
71
65
  skip = [0] * ndim
72
66
  log.warning("Skip list is not the correct dimensions, setting to defaults.")
73
67
 
@@ -86,71 +80,80 @@ class MABBinMapper(FuncBinMapper):
86
80
 
87
81
  super().__init__(map_mab, n_total_bins, kwargs=kwargs)
88
82
 
89
- def determine_total_bins(self, nbins_per_dim, direction, skip, bottleneck, **kwargs):
83
+ def determine_total_bins(
84
+ self, nbins_per_dim: List[int], direction: List[int], skip: List[int], bottleneck: bool, **kwargs
85
+ ) -> int:
90
86
  """
91
- The following is neccessary because functional bin mappers need to "reserve"
87
+ Calculate the total number of bins needed, taking direction and skipping into account.
88
+ This function is necessary because functional bin mappers need to "reserve"
92
89
  bins and tell the sim manager how many bins they will need to use, this is
93
90
  determined by taking all direction/skipping info into account.
94
91
 
95
92
  Parameters
96
93
  ----------
97
- nbins_per_dim : int
98
- Number of total bins in each direction.
94
+ nbins_per_dim : list of int
95
+ Number of total bins in each dimension within the linear portion.
99
96
  direction : list of int
100
- Direction in each dimension. See __init__ for more information.
97
+ Direction in each dimension.
101
98
  skip : list of int
102
- List of 0s and 1s indicating whether to skip each dimension.
99
+ List indicating whether to skip each dimension.
103
100
  bottleneck : bool
104
- Whether to include separate bin for bottleneck walker(s).
101
+ Whether to include a separate bin for bottleneck walker(s).
105
102
  **kwargs : dict
106
- Arbitary keyword arguments. Contains unneeded MAB parameters.
103
+ Additional MAB parameters (unused).
107
104
 
108
105
  Returns
109
106
  -------
110
107
  n_total_bins : int
111
108
  Number of total bins.
112
-
113
109
  """
114
- n_total_bins = np.prod(nbins_per_dim)
115
- ndim = len(nbins_per_dim)
116
- for i in range(ndim):
117
- if skip[i] == 0:
118
- if direction[i] != 0:
110
+ # Update nbins_per_dim with any skipped dimensions, setting number of bins along skipped dimensions to 1
111
+ skip = np.array([bool(s) for s in skip])
112
+ nbins_per_dim = np.array(nbins_per_dim)
113
+ nbins_per_dim[skip] = 1
114
+
115
+ # Total bins is product of all linear bins plus and special bins
116
+ n_total_bins = nbins_per_dim.prod()
117
+ for direct, skip_dim in zip(direction, skip):
118
+ if not skip_dim:
119
+ if direct in [-1, 1]:
120
+ # 1 lead or lag bin + 1 bottleneck bin
119
121
  n_total_bins += 1 + 1 * bottleneck
120
- else:
122
+ elif direct == 0:
123
+ # 2 lead/lag bins + 2 bottleneck bins
121
124
  n_total_bins += 2 + 2 * bottleneck
122
- else:
123
- n_total_bins -= nbins_per_dim[i] - 1
124
- n_total_bins += 1 * ndim # or else it will be one bin short
125
+ elif direct == 86:
126
+ # 0 lead/lag + 2 bottleneck bins
127
+ n_total_bins += 2 * bottleneck
125
128
  return n_total_bins
126
129
 
127
130
 
128
- def map_mab(coords, mask, output, *args, **kwargs):
131
+ def map_mab(coords: np.ndarray, mask: np.ndarray, output: List[int], *args, **kwargs) -> List[int]:
129
132
  """
130
- Binning which adaptively places bins based on the positions of extrema segments and
131
- bottleneck segments, which are where the difference in probability is the greatest
132
- along the progress coordinate. Operates per dimension and places a fixed number of
133
+ Adaptively place bins based on extrema and bottleneck segments along the progress coordinate.
134
+
135
+ Bottleneck segments are where the difference in probability is the greatest
136
+ along the progress coordinate. Operates per dimension (unless skipped) and places a fixed number of
133
137
  evenly spaced bins between the segments with the min and max pcoord values. Extrema and
134
138
  bottleneck segments are assigned their own bins.
135
139
 
136
140
  Parameters
137
141
  ----------
138
- coords : ndarray
142
+ coords : np.ndarray
139
143
  An array with pcoord and weight info.
140
- mask : ndarray
141
- Array of 1 (True) and 0 (False), to filter out unwanted segment info.
144
+ mask : np.ndarray
145
+ Boolean array to filter out unwanted segments.
142
146
  output : list
143
147
  The main list that, for each segment, holds the bin assignment.
144
148
  *args : list
145
- Variable length arguments.
149
+ Additional arguments.
146
150
  **kwargs : dict
147
- Arbitary keyword arguments. Contains most of the MAB-needed parameters.
151
+ Additional keyword arguments. Contains most of the MAB-needed parameters.
148
152
 
149
153
  Returns
150
154
  ------
151
155
  output : list
152
- The main list that, for each segment, holds the bin assignment.
153
-
156
+ List with bin assignments for each segment.
154
157
  """
155
158
 
156
159
  # Argument Processing
@@ -158,8 +161,8 @@ def map_mab(coords, mask, output, *args, **kwargs):
158
161
  ndim = len(nbins_per_dim)
159
162
  pca = kwargs.get("pca", False)
160
163
  bottleneck = kwargs.get("bottleneck", True)
161
- direction = kwargs.get("direction", ([0] * ndim))
162
- skip = kwargs.get("skip", ([0] * ndim))
164
+ direction = kwargs.get("direction", [0] * ndim)
165
+ skip = kwargs.get("skip", [0] * ndim)
163
166
  mab_log = kwargs.get("mab_log", False)
164
167
  bin_log = kwargs.get("bin_log", False)
165
168
  bin_log_path = kwargs.get("bin_log_path", "$WEST_SIM_ROOT/binbounds.log")
@@ -170,8 +173,8 @@ def map_mab(coords, mask, output, *args, **kwargs):
170
173
  if skip is None:
171
174
  skip = [0] * ndim
172
175
 
173
- allcoords = np.copy(coords)
174
- allmask = np.copy(mask)
176
+ allcoords = coords.copy()
177
+ allmask = mask.copy()
175
178
 
176
179
  weights = None
177
180
  isfinal = None
@@ -185,11 +188,11 @@ def map_mab(coords, mask, output, *args, **kwargs):
185
188
  if coords[0, -1] == 0:
186
189
  report = True
187
190
  if coords.shape[1] > ndim + 1:
188
- isfinal = allcoords[:, ndim + 1].astype(np.bool_)
191
+ isfinal = allcoords[:, ndim + 1].astype(bool)
189
192
  else:
190
- isfinal = np.ones(coords.shape[0], dtype=np.bool_)
193
+ isfinal = np.ones(coords.shape[0], dtype=bool)
191
194
  coords = coords[isfinal, :ndim]
192
- weights = allcoords[isfinal, ndim + 0]
195
+ weights = allcoords[isfinal, ndim]
193
196
  mask = mask[isfinal]
194
197
  splitting = True
195
198
 
@@ -199,229 +202,305 @@ def map_mab(coords, mask, output, *args, **kwargs):
199
202
  weights = None
200
203
  splitting = False
201
204
 
202
- varcoords = np.copy(coords)
203
205
  originalcoords = np.copy(coords)
204
206
  if pca and len(output) > 1:
205
- colavg = np.mean(coords, axis=0)
206
- for i in range(len(coords)):
207
- for j in range(len(coords[i])):
208
- varcoords[i][j] = coords[i][j] - colavg[j]
209
- covcoords = np.cov(np.transpose(varcoords), aweights=weights)
210
- eigval, eigvec = np.linalg.eigh(covcoords)
211
- eigvec = eigvec[:, np.argmax(np.absolute(eigvec), axis=1)]
212
- for i in range(len(eigvec)):
213
- if eigvec[i, i] < 0:
214
- eigvec[:, i] = -1 * eigvec[:, i]
215
- for i in range(ndim):
216
- for j in range(len(output)):
217
- coords[j][i] = np.dot(varcoords[j], eigvec[:, i])
218
-
219
- maxlist = []
220
- minlist = []
221
- difflist = []
222
- flipdifflist = []
223
- for n in range(ndim):
224
- # identify the boundary segments
225
- maxcoord = np.max(coords[mask, n])
226
- mincoord = np.min(coords[mask, n])
227
- maxlist.append(maxcoord)
228
- minlist.append(mincoord)
229
-
230
- # detect the bottleneck segments, this uses the weights
231
- if splitting:
232
- temp = np.column_stack((originalcoords[mask, n], weights[mask]))
233
- sorted_indices = temp[:, 0].argsort()
234
- temp = temp[sorted_indices]
235
- for p in range(len(temp)):
236
- if temp[p][1] == 0:
237
- temp[p][1] = 10**-323
238
- fliptemp = np.flipud(temp)
239
-
240
- difflist.append(None)
241
- flipdifflist.append(None)
242
- maxdiff = 0
243
- flipmaxdiff = 0
244
- for i in range(1, len(temp) - 1):
245
- comprob = 0
246
- flipcomprob = 0
247
- j = i + 1
248
- while j < len(temp):
249
- comprob = comprob + temp[j][1]
250
- flipcomprob = flipcomprob + fliptemp[j][1]
251
- j = j + 1
252
- diff = -np.log(comprob) + np.log(temp[i][1])
253
- if diff > maxdiff:
254
- difflist[n] = temp[i][0]
255
- maxdiff = diff
256
- flipdiff = -np.log(flipcomprob) + np.log(fliptemp[i][1])
257
- if flipdiff > flipmaxdiff:
258
- flipdifflist[n] = fliptemp[i][0]
259
- flipmaxdiff = flipdiff
207
+ coords = apply_pca(coords, weights)
208
+
209
+ # Computing special bins (bottleneck and boundary bins)
210
+ minlist, maxlist, bottlenecks_forward, bottlenecks_reverse = calculate_bin_boundaries(
211
+ originalcoords, weights, mask, skip, splitting, bottleneck
212
+ )
260
213
 
261
214
  if mab_log and report:
262
- westpa.rc.pstatus("################ MAB stats ################")
263
- westpa.rc.pstatus("minima in each dimension: {}".format(minlist))
264
- westpa.rc.pstatus("maxima in each dimension: {}".format(maxlist))
265
- westpa.rc.pstatus("direction in each dimension: {}".format(direction))
266
- westpa.rc.pstatus("skip in each dimension: {}".format(skip))
267
- westpa.rc.pstatus("###########################################")
268
- westpa.rc.pflush()
269
-
270
- # assign segments to bins
271
- # the total number of linear bins is the boundary base
272
- boundary_base = np.prod(nbins_per_dim)
273
-
274
- # the bottleneck base is offset by the number of boundary walkers,
275
- # which is two per dimension unless there is a direction specified
276
- # in a particluar dimension, then it's just one
277
- bottleneck_base = boundary_base
215
+ log_mab_stats(minlist, maxlist, direction, skip)
216
+
217
+ # Assign segments to bins
218
+ n_bottleneck_filled = bin_assignment(
219
+ allcoords,
220
+ allmask,
221
+ minlist,
222
+ maxlist,
223
+ bottlenecks_forward,
224
+ bottlenecks_reverse,
225
+ nbins_per_dim,
226
+ direction,
227
+ skip,
228
+ splitting,
229
+ bottleneck,
230
+ output,
231
+ )
232
+
233
+ # Report MAB bin statistics
234
+ if bin_log and report and westpa.rc.sim_manager.n_iter:
235
+ log_bin_boundaries(
236
+ skip,
237
+ bottleneck,
238
+ direction,
239
+ bin_log_path,
240
+ minlist,
241
+ maxlist,
242
+ nbins_per_dim,
243
+ n_bottleneck_filled,
244
+ bottlenecks_forward,
245
+ bottlenecks_reverse,
246
+ )
247
+
248
+ return output
249
+
250
+
251
+ def apply_pca(coords, weights):
252
+ colavg = np.mean(coords, axis=0)
253
+ varcoords = coords - colavg
254
+ covcoords = np.cov(varcoords.T, aweights=weights)
255
+ eigval, eigvec = np.linalg.eigh(covcoords)
256
+ eigvec = eigvec[:, np.argmax(np.abs(eigvec), axis=1)]
257
+ eigvec[:, np.diag(eigvec) < 0] *= -1
258
+ return np.dot(varcoords, eigvec)
259
+
260
+
261
+ def calculate_bin_boundaries(coords, weights, mask, skip, splitting, bottleneck):
262
+ """
263
+ This function calculates minima, maxima, and bottleneck segments.
264
+ """
265
+ skip = np.array([bool(s) for s in skip])
266
+
267
+ # Initialize lists to hold minima and maxima along each dimension
268
+ minlist, maxlist = [], []
269
+ # Initialize lists to hold bottleneck segments along each dimension
270
+ bottlenecks_forward, bottlenecks_reverse = [None] * len(coords[0]), [None] * len(coords[0])
271
+ # number of unmasked coords
272
+ n_coords = mask.sum()
273
+ # Grabbing all unmasked coords and weights
274
+ unmasked_coords = coords[mask, :]
275
+ unmasked_weights = weights[mask] if weights is not None else None
276
+ # Replace any zero weights with non-zero values so that log(weight) is well-defined
277
+ if unmasked_weights is not None:
278
+ unmasked_weights[unmasked_weights == 0] = 10**-323
279
+ # Looping over each dimension of progress coordinate, even those being skipped
280
+ for n in range(len(coords[0])):
281
+ # We calculate the min and max pcoord along each dimension (boundary segments) even if skipping
282
+ maxlist.append(np.max(coords[mask, n]))
283
+ minlist.append(np.min(coords[mask, n]))
284
+ # Now we calculate the bottleneck segments
285
+ if splitting and bottleneck and not skip[n]:
286
+ bottlenecks_forward[n], bottlenecks_reverse[n] = detect_bottlenecks(unmasked_coords, unmasked_weights, n_coords, n)
287
+
288
+ return minlist, maxlist, bottlenecks_forward, bottlenecks_reverse
289
+
290
+
291
+ def detect_bottlenecks(unmasked_coords, unmasked_weights, n_coords, n):
292
+ """
293
+ Detect the bottleneck segments along the given coordinate n, this uses the weights
294
+ """
295
+ # Grabbing all unmasked coords in current dimension, plus corresponding weights
296
+ # Sort by current dimension in coord, smallest to largest
297
+ sorted_indices = unmasked_coords[:, n].argsort(kind='stable')
298
+
299
+ # Grab sorted coords and weights
300
+ coords_srt = unmasked_coords[sorted_indices, :]
301
+ weights_srt = unmasked_weights[sorted_indices]
302
+
303
+ # Also sort in reverse order for opposite direction
304
+ coords_srt_flip = np.flipud(coords_srt)
305
+ weights_srt_flip = np.flipud(weights_srt)
306
+
307
+ # Initialize the max directional differences along current dimension as None (these may not be updated)
308
+ bottleneck_coords, bottleneck_coords_flip = None, None
309
+ maxdiff, maxdiff_flip = -np.inf, -np.inf
310
+
311
+ # Looping through all non-boundary coords
312
+ # Compute the cumulative weight on either side of each non-boundary walker
313
+ for i in range(1, n_coords - 1):
314
+ # Summing up weights of all walkers ahead of current walker along current dim in both directions
315
+ cumulative_prob = np.sum(weights_srt[i + 1 :])
316
+ cumulative_prob_flip = np.sum(weights_srt_flip[i + 1 :])
317
+ # Compute the difference of log cumulative weight of current walker and all walkers ahead of it (Z im the MAB paper)
318
+ # We use the log as weights vary over many orders of magnitude
319
+ # Note a negative Z indicates the cumulative weight ahead of the current walker is larger than the weight of the current walker,
320
+ # while a positive Z indicates the cumulative weight ahead of the current walker is smaller, indicating a barrier
321
+ Z = np.log(weights_srt[i]) - np.log(cumulative_prob)
322
+ Z_flip = np.log(weights_srt_flip[i]) - np.log(cumulative_prob_flip)
323
+ # Update ALL coords of the current walker into bottlenecks_forward if it is largest
324
+ # This way we uniquely identify a walker by its full set of coordinates
325
+ if Z > maxdiff:
326
+ bottleneck_coords = coords_srt[i, :]
327
+ maxdiff = Z
328
+ if Z_flip > maxdiff_flip:
329
+ bottleneck_coords_flip = coords_srt_flip[i, :]
330
+ maxdiff_flip = Z_flip
331
+ return bottleneck_coords, bottleneck_coords_flip
332
+
333
+
334
+ def log_mab_stats(minlist, maxlist, direction, skip):
335
+ westpa.rc.pstatus("################ MAB stats ################")
336
+ westpa.rc.pstatus(f"minima in each dimension: {minlist}")
337
+ westpa.rc.pstatus(f"maxima in each dimension: {maxlist}")
338
+ westpa.rc.pstatus(f"direction in each dimension: {direction}")
339
+ westpa.rc.pstatus(f"skip in each dimension: {skip}")
340
+ westpa.rc.pstatus("###########################################")
341
+ westpa.rc.pflush()
342
+
343
+
344
+ def bin_assignment(
345
+ coords,
346
+ mask,
347
+ minlist,
348
+ maxlist,
349
+ bottlenecks_forward,
350
+ bottlenecks_reverse,
351
+ nbins_per_dim,
352
+ direction,
353
+ skip,
354
+ splitting,
355
+ bottleneck,
356
+ output,
357
+ ):
358
+ """
359
+ Assign segments to bins based on the minima, maxima, and
360
+ bottleneck segments along the progress coordinate.
361
+ """
362
+ # Update nbins_per_dim with any skipped dimensions, setting number of bins along skipped dimensions to 1
363
+ skip = np.array([bool(s) for s in skip])
364
+ nbins_per_dim = np.array(nbins_per_dim)
365
+ nbins_per_dim[skip] = 1
366
+ direction = np.array(direction)
367
+
368
+ ndim = len(nbins_per_dim)
278
369
  n_bottleneck_filled = 0
279
370
 
280
- for i in range(0, ndim):
281
- # for single direction, 1 boundary walker
282
- if direction[i] == 1 or direction[i] == -1:
283
- bottleneck_base += 1
284
- # 2 boundary walkers with 0 direction
285
- elif direction[i] == 0:
286
- bottleneck_base += 2
287
- # for 86 direction, no boundary walkers so offset of 0
288
- elif direction[i] == 86:
289
- bottleneck_base += 0
290
-
291
- # if a dimension is being "skipped", leave only one bin total as
292
- # the offset
293
- for i in range(0, ndim):
294
- if skip[i] != 0:
295
- boundary_base -= nbins_per_dim[i] - 1
371
+ # Boolean arrays that track use of special bins along each dimension
372
+ skip_bneck_fwd = np.array([d == -1 if bottleneck else True for d in direction]) + skip
373
+ skip_bneck_rev = np.array([d == 1 if bottleneck else True for d in direction]) + skip
374
+ skip_lead = np.array([d in [86, -1] for d in direction]) + skip
375
+ skip_lag = np.array([d in [86, 1] for d in direction]) + skip
296
376
 
297
- for i in range(len(output)):
298
- if not allmask[i]:
299
- continue
377
+ # List of dimensions that are not skipped
378
+ active_dims = np.array([n for n in range(ndim) if not skip[n]])
300
379
 
301
- # special means either a boundary or bottleneck walker (not a walker in the linear space)
302
- special = False
303
- # this holder is the bin number, which only needs to be unique for different walker groups
304
- holder = 0
305
- if splitting:
306
- for n in range(ndim):
307
- coord = allcoords[i][n]
380
+ # Compute the boundary bin ID offsets
381
+ # In forward direction, this is all the linear bins
382
+ boundary_bin_id_offset_fwd = nbins_per_dim.prod()
383
+ # In reverse, we add the number of forward boundary bins to the offset
384
+ boundary_bin_id_offset_rev = boundary_bin_id_offset_fwd + (~skip_lead).sum()
385
+
386
+ # Compute the bottleneck bin ID offsets
387
+ # In forward direction, bin IDs are offset by all linear and boundary bins
388
+ bneck_bin_id_offset_fwd = boundary_bin_id_offset_rev + (~skip_lag).sum()
389
+ # In reverse, we add the number of forward bottleneck bins to the offset
390
+ bneck_bin_id_offset_rev = bneck_bin_id_offset_fwd + (~skip_bneck_fwd).sum()
308
391
 
309
- # if skipped, just assign the walkers to the same bin (offset of boundary base)
310
- if skip[n] != 0:
311
- holder = boundary_base + n
392
+ # Bin assignment loop over all walkers
393
+ for i in range(len(output)):
394
+ # Skip masked walkers, these walkers bin IDs are unchanged
395
+ if not mask[i]:
396
+ continue
397
+ # Initialize bin ID and special tracker for current coord
398
+ # The special variable indicates a boundary or bottleneck walker (not assigned to the linear space)
399
+ bin_id, special = 0, False
400
+
401
+ # Searching for bottleneck bins first
402
+ if splitting and bottleneck:
403
+ for n in active_dims:
404
+ # Grab coord(s) of current walker
405
+ coord = coords[i][:ndim]
406
+ # Assign bottlenecks, taking directionality into account
407
+ # Check both directions when using 0 or 86
408
+ # Note: 86 implies no leading or lagging bins, but does add bottlenecks for *both* directions when bottleneck is enabled
409
+ # Note: All bottleneck bins will typically be filled unless a walker is simultaneously in bottleneck bins along multiple dimensions
410
+ # or there are too few walkers to compute free energy barriers
411
+ if (coord == bottlenecks_forward[n]).all() and not skip_bneck_fwd[n]:
412
+ bin_id = bneck_bin_id_offset_fwd + n - skip_bneck_fwd[:n].sum()
413
+ special = True
414
+ n_bottleneck_filled += 1
415
+ break
416
+ elif (coord == bottlenecks_reverse[n]).all() and not skip_bneck_rev[n]:
417
+ bin_id = bneck_bin_id_offset_rev + n - skip_bneck_rev[:n].sum()
418
+ special = True
419
+ n_bottleneck_filled += 1
312
420
  break
313
421
 
314
- # assign bottlenecks, taking directionality into account
315
- if bottleneck:
316
- if direction[n] == -1:
317
- if coord == flipdifflist[n]:
318
- holder = bottleneck_base + n
319
- special = True
320
- n_bottleneck_filled += 1
321
- break
322
-
323
- if direction[n] == 1:
324
- if coord == difflist[n]:
325
- holder = bottleneck_base + n
326
- special = True
327
- n_bottleneck_filled += 1
328
- break
329
-
330
- # both directions when using 0 or with
331
- # special value of 86 for no lead/lag split
332
- if direction[n] == 0 or direction[n] == 86:
333
- if coord == difflist[n]:
334
- holder = bottleneck_base + n
335
- special = True
336
- n_bottleneck_filled += 1
337
- break
338
- elif coord == flipdifflist[n]:
339
- holder = bottleneck_base + n + 1
340
- special = True
341
- n_bottleneck_filled += 1
342
- break
343
-
344
- # assign boundary walkers, taking directionality into account
345
- if direction[n] == -1:
346
- if coord == minlist[n]:
347
- holder = boundary_base + n
348
- special = True
349
- break
350
-
351
- elif direction[n] == 1:
352
- if coord == maxlist[n]:
353
- holder = boundary_base + n
354
- special = True
355
- break
356
-
357
- elif direction[n] == 0:
358
- if coord == minlist[n]:
359
- holder = boundary_base + n
360
- special = True
361
- break
362
- elif coord == maxlist[n]:
363
- holder = boundary_base + n + 1
364
- special = True
365
- break
366
-
367
- # special value for direction with no lead/lag split
368
- elif direction[n] == 86:
369
- # westpa.rc.pstatus(f"No lead/lag split for dim {n}")
370
- # westpa.rc.pflush()
371
- # nornmally adds to special bin but here just leaving it forever empty
372
- # holder = boundary_base + n
422
+ # Now check for boundary walkers, taking directionality into account
423
+ # This should only be done after fully checking for bottleneck walkers
424
+ if splitting and not special:
425
+ for n in active_dims:
426
+ # Grab coord of current walker along current dimension
427
+ coord = coords[i, n]
428
+ if (coord == maxlist[n]) and not skip_lead[n]:
429
+ bin_id = boundary_bin_id_offset_fwd + n - skip_lead[:n].sum()
430
+ special = True
431
+ break
432
+ elif (coord == minlist[n]) and not skip_lag[n]:
433
+ bin_id = boundary_bin_id_offset_rev + n - skip_lag[:n].sum()
434
+ special = True
373
435
  break
374
436
 
375
- # the following are for the "linear" portion
437
+ # Now check for linear bin walkers
376
438
  if not special:
439
+ # Again we loop over the dimensions
440
+ # Note: no need to worry about skipping as we've already set all skipped dimensions to 1 bin
377
441
  for n in range(ndim):
378
- # if skipped, it's added to the same bin as the special walkers above
379
- if skip[n] != 0:
380
- holder = boundary_base + n
381
- break
382
-
383
- coord = allcoords[i][n]
442
+ coord = coords[i][n]
384
443
  nbins = nbins_per_dim[n]
385
444
  minp = minlist[n]
386
445
  maxp = maxlist[n]
387
446
 
447
+ # Generate the bins along this dimension
388
448
  bins = np.linspace(minp, maxp, nbins + 1)
389
- bin_number = np.digitize(coord, bins) - 1
390
-
391
- if isfinal is None or not isfinal[i]:
392
- if bin_number >= nbins:
393
- bin_number = nbins - 1
394
- elif bin_number < 0:
395
- bin_number = 0
396
- elif bin_number >= nbins or bin_number < 0:
397
- if np.isclose(bins[-1], coord):
398
- bin_number = nbins - 1
399
- elif np.isclose(bins[0], coord):
400
- bin_number = 0
401
- else:
402
- raise ValueError("Walker out of boundary")
403
-
404
- holder += bin_number * np.prod(nbins_per_dim[:n])
405
-
406
- # output is the main list that, for each segment, holds the bin assignment
407
- output[i] = holder
408
-
409
- if bin_log and report:
410
- if westpa.rc.sim_manager.n_iter:
411
- with open(expandvars(bin_log_path), 'a') as bb_file:
412
- # Iteration Number
413
- bb_file.write(f'iteration: {westpa.rc.sim_manager.n_iter}\n')
414
- bb_file.write('bin boundaries: ')
415
- for n in range(ndim):
416
- # Write binbounds per dim
417
- bb_file.write(f'{np.linspace(minlist[n], maxlist[n], nbins_per_dim[n] + 1)}\t')
418
- # Min/Max pcoord
419
- bb_file.write(f'\nmin/max pcoord: {minlist} {maxlist}\n')
420
- bb_file.write(f'bottleneck bins: {n_bottleneck_filled}\n')
421
- if n_bottleneck_filled > 0:
422
- # Bottlenecks bins exist (passes any of the if bottleneck: checks)
423
- bb_file.write(f'bottleneck pcoord: {flipdifflist} {difflist}\n\n')
424
- else:
425
- bb_file.write('\n')
426
449
 
427
- return output
450
+ # Assign walker to a bin along this dimension
451
+ bin_number = np.digitize(coord, bins) - 1 # note np.digitize is 1-indexed
452
+
453
+ # Sometimes the walker is exactly at the max/min value,
454
+ # which would put it in the next bin
455
+ if bin_number == nbins:
456
+ bin_number -= 1
457
+ elif bin_number == -1:
458
+ bin_number = 0
459
+ elif bin_number > nbins or bin_number < -1:
460
+ raise ValueError("Walker out of boundary.")
461
+
462
+ # Assign to bin within the full dimensional space
463
+ bin_id += bin_number * np.prod(nbins_per_dim[:n])
464
+
465
+ # Output is the main list that, for each segment, holds the bin assignment
466
+ output[i] = bin_id
467
+ return n_bottleneck_filled
468
+
469
+
470
+ def log_bin_boundaries(
471
+ skip,
472
+ bottleneck,
473
+ direction,
474
+ bin_log_path,
475
+ minlist,
476
+ maxlist,
477
+ nbins_per_dim,
478
+ n_bottleneck_filled,
479
+ bottlenecks_forward,
480
+ bottlenecks_reverse,
481
+ ):
482
+ ndim = len(nbins_per_dim)
483
+ skip = np.array([bool(s) for s in skip])
484
+ active_dims = np.array([n for n in range(ndim) if not skip[n]])
485
+ max_bottleneck = np.sum([1 if direction[n] in [-1, 1] else 2 for n in active_dims]) if bottleneck else 0
486
+ with open(expandvars(bin_log_path), 'a') as bb_file:
487
+ # Iteration Number
488
+ bb_file.write(f'Iteration: {westpa.rc.sim_manager.n_iter}\n')
489
+ bb_file.write('MAB linear bin boundaries: ')
490
+ for n in range(ndim):
491
+ # Write binbounds per dim
492
+ bb_file.write(f'{np.linspace(minlist[n], maxlist[n], nbins_per_dim[n] + 1)}\t')
493
+ # Min/Max pcoord
494
+ bb_file.write(f'\nLagging pcoord in each dimension: {minlist}\n')
495
+ bb_file.write(f'Leading pcoord in each dimension: {maxlist}\n')
496
+ # Bottlenecks bins exist
497
+ if bottleneck:
498
+ bb_file.write(f'Number of bottleneck bins filled: {n_bottleneck_filled} / {max_bottleneck}\n')
499
+ for n in active_dims:
500
+ if direction[n] in [0, 1, 86]:
501
+ bb_file.write(f'Dimension {n} forward bottleneck walker at: {list(bottlenecks_forward[n])}\n')
502
+ if direction[n] in [0, -1, 86]:
503
+ bb_file.write(f'Dimension {n} backward bottleneck walker at: {list(bottlenecks_reverse[n])}\n')
504
+ bb_file.write('\n')
505
+ else:
506
+ bb_file.write('\n')