westpa 2022.10__cp312-cp312-macosx_10_9_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of westpa might be problematic. Click here for more details.
- westpa/__init__.py +14 -0
- westpa/_version.py +21 -0
- westpa/analysis/__init__.py +5 -0
- westpa/analysis/core.py +746 -0
- westpa/analysis/statistics.py +27 -0
- westpa/analysis/trajectories.py +360 -0
- westpa/cli/__init__.py +0 -0
- westpa/cli/core/__init__.py +0 -0
- westpa/cli/core/w_fork.py +152 -0
- westpa/cli/core/w_init.py +230 -0
- westpa/cli/core/w_run.py +77 -0
- westpa/cli/core/w_states.py +212 -0
- westpa/cli/core/w_succ.py +99 -0
- westpa/cli/core/w_truncate.py +59 -0
- westpa/cli/tools/__init__.py +0 -0
- westpa/cli/tools/ploterr.py +506 -0
- westpa/cli/tools/plothist.py +706 -0
- westpa/cli/tools/w_assign.py +596 -0
- westpa/cli/tools/w_bins.py +166 -0
- westpa/cli/tools/w_crawl.py +119 -0
- westpa/cli/tools/w_direct.py +547 -0
- westpa/cli/tools/w_dumpsegs.py +94 -0
- westpa/cli/tools/w_eddist.py +506 -0
- westpa/cli/tools/w_fluxanl.py +378 -0
- westpa/cli/tools/w_ipa.py +833 -0
- westpa/cli/tools/w_kinavg.py +127 -0
- westpa/cli/tools/w_kinetics.py +96 -0
- westpa/cli/tools/w_multi_west.py +414 -0
- westpa/cli/tools/w_ntop.py +213 -0
- westpa/cli/tools/w_pdist.py +515 -0
- westpa/cli/tools/w_postanalysis_matrix.py +82 -0
- westpa/cli/tools/w_postanalysis_reweight.py +53 -0
- westpa/cli/tools/w_red.py +486 -0
- westpa/cli/tools/w_reweight.py +780 -0
- westpa/cli/tools/w_select.py +226 -0
- westpa/cli/tools/w_stateprobs.py +111 -0
- westpa/cli/tools/w_trace.py +599 -0
- westpa/core/__init__.py +0 -0
- westpa/core/_rc.py +673 -0
- westpa/core/binning/__init__.py +55 -0
- westpa/core/binning/_assign.cpython-312-darwin.so +0 -0
- westpa/core/binning/assign.py +449 -0
- westpa/core/binning/binless.py +96 -0
- westpa/core/binning/binless_driver.py +54 -0
- westpa/core/binning/binless_manager.py +190 -0
- westpa/core/binning/bins.py +47 -0
- westpa/core/binning/mab.py +427 -0
- westpa/core/binning/mab_driver.py +54 -0
- westpa/core/binning/mab_manager.py +198 -0
- westpa/core/data_manager.py +1694 -0
- westpa/core/extloader.py +74 -0
- westpa/core/h5io.py +995 -0
- westpa/core/kinetics/__init__.py +24 -0
- westpa/core/kinetics/_kinetics.cpython-312-darwin.so +0 -0
- westpa/core/kinetics/events.py +147 -0
- westpa/core/kinetics/matrates.py +156 -0
- westpa/core/kinetics/rate_averaging.py +266 -0
- westpa/core/progress.py +218 -0
- westpa/core/propagators/__init__.py +54 -0
- westpa/core/propagators/executable.py +715 -0
- westpa/core/reweight/__init__.py +14 -0
- westpa/core/reweight/_reweight.cpython-312-darwin.so +0 -0
- westpa/core/reweight/matrix.py +126 -0
- westpa/core/segment.py +119 -0
- westpa/core/sim_manager.py +830 -0
- westpa/core/states.py +359 -0
- westpa/core/systems.py +93 -0
- westpa/core/textio.py +74 -0
- westpa/core/trajectory.py +330 -0
- westpa/core/we_driver.py +908 -0
- westpa/core/wm_ops.py +43 -0
- westpa/core/yamlcfg.py +391 -0
- westpa/fasthist/__init__.py +34 -0
- westpa/fasthist/__main__.py +110 -0
- westpa/fasthist/_fasthist.cpython-312-darwin.so +0 -0
- westpa/mclib/__init__.py +264 -0
- westpa/mclib/__main__.py +28 -0
- westpa/mclib/_mclib.cpython-312-darwin.so +0 -0
- westpa/oldtools/__init__.py +4 -0
- westpa/oldtools/aframe/__init__.py +35 -0
- westpa/oldtools/aframe/atool.py +75 -0
- westpa/oldtools/aframe/base_mixin.py +26 -0
- westpa/oldtools/aframe/binning.py +178 -0
- westpa/oldtools/aframe/data_reader.py +560 -0
- westpa/oldtools/aframe/iter_range.py +200 -0
- westpa/oldtools/aframe/kinetics.py +117 -0
- westpa/oldtools/aframe/mcbs.py +146 -0
- westpa/oldtools/aframe/output.py +39 -0
- westpa/oldtools/aframe/plotting.py +90 -0
- westpa/oldtools/aframe/trajwalker.py +126 -0
- westpa/oldtools/aframe/transitions.py +469 -0
- westpa/oldtools/cmds/__init__.py +0 -0
- westpa/oldtools/cmds/w_ttimes.py +358 -0
- westpa/oldtools/files.py +34 -0
- westpa/oldtools/miscfn.py +23 -0
- westpa/oldtools/stats/__init__.py +4 -0
- westpa/oldtools/stats/accumulator.py +35 -0
- westpa/oldtools/stats/edfs.py +129 -0
- westpa/oldtools/stats/mcbs.py +89 -0
- westpa/tools/__init__.py +33 -0
- westpa/tools/binning.py +472 -0
- westpa/tools/core.py +340 -0
- westpa/tools/data_reader.py +159 -0
- westpa/tools/dtypes.py +31 -0
- westpa/tools/iter_range.py +198 -0
- westpa/tools/kinetics_tool.py +340 -0
- westpa/tools/plot.py +283 -0
- westpa/tools/progress.py +17 -0
- westpa/tools/selected_segs.py +154 -0
- westpa/tools/wipi.py +751 -0
- westpa/trajtree/__init__.py +4 -0
- westpa/trajtree/_trajtree.cpython-312-darwin.so +0 -0
- westpa/trajtree/trajtree.py +117 -0
- westpa/westext/__init__.py +0 -0
- westpa/westext/adaptvoronoi/__init__.py +3 -0
- westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
- westpa/westext/hamsm_restarting/__init__.py +3 -0
- westpa/westext/hamsm_restarting/example_overrides.py +35 -0
- westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
- westpa/westext/stringmethod/__init__.py +11 -0
- westpa/westext/stringmethod/fourier_fitting.py +69 -0
- westpa/westext/stringmethod/string_driver.py +253 -0
- westpa/westext/stringmethod/string_method.py +306 -0
- westpa/westext/weed/BinCluster.py +180 -0
- westpa/westext/weed/ProbAdjustEquil.py +100 -0
- westpa/westext/weed/UncertMath.py +247 -0
- westpa/westext/weed/__init__.py +10 -0
- westpa/westext/weed/weed_driver.py +182 -0
- westpa/westext/wess/ProbAdjust.py +101 -0
- westpa/westext/wess/__init__.py +6 -0
- westpa/westext/wess/wess_driver.py +207 -0
- westpa/work_managers/__init__.py +57 -0
- westpa/work_managers/core.py +396 -0
- westpa/work_managers/environment.py +134 -0
- westpa/work_managers/mpi.py +318 -0
- westpa/work_managers/processes.py +187 -0
- westpa/work_managers/serial.py +28 -0
- westpa/work_managers/threads.py +79 -0
- westpa/work_managers/zeromq/__init__.py +20 -0
- westpa/work_managers/zeromq/core.py +641 -0
- westpa/work_managers/zeromq/node.py +131 -0
- westpa/work_managers/zeromq/work_manager.py +526 -0
- westpa/work_managers/zeromq/worker.py +320 -0
- westpa-2022.10.dist-info/AUTHORS +22 -0
- westpa-2022.10.dist-info/LICENSE +21 -0
- westpa-2022.10.dist-info/METADATA +183 -0
- westpa-2022.10.dist-info/RECORD +150 -0
- westpa-2022.10.dist-info/WHEEL +5 -0
- westpa-2022.10.dist-info/entry_points.txt +29 -0
- westpa-2022.10.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Kinetics analysis library
|
|
3
|
+
'''
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
from .rate_averaging import RateAverager # noqa
|
|
8
|
+
|
|
9
|
+
from . import _kinetics # noqa
|
|
10
|
+
from ._kinetics import ( # noqa
|
|
11
|
+
calculate_labeled_fluxes,
|
|
12
|
+
labeled_flux_to_rate,
|
|
13
|
+
calculate_labeled_fluxes_alllags,
|
|
14
|
+
nested_to_flat_matrix,
|
|
15
|
+
nested_to_flat_vector,
|
|
16
|
+
flat_to_nested_matrix,
|
|
17
|
+
flat_to_nested_vector,
|
|
18
|
+
find_macrostate_transitions,
|
|
19
|
+
sequence_macro_flux_to_rate,
|
|
20
|
+
)
|
|
21
|
+
from .events import WKinetics # noqa
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
log = logging.getLogger(__name__)
|
|
Binary file
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from westpa.core.data_manager import weight_dtype
|
|
6
|
+
from westpa.core import h5io
|
|
7
|
+
|
|
8
|
+
# From w_kinetics.
|
|
9
|
+
from westpa.tools.dtypes import ed_list_dtype
|
|
10
|
+
from westpa.core.binning import index_dtype
|
|
11
|
+
from westpa.core.kinetics._kinetics import _fast_transition_state_copy # @UnresolvedImport
|
|
12
|
+
from westpa.core.kinetics import find_macrostate_transitions
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
warnings.filterwarnings('ignore', category=DeprecationWarning)
|
|
16
|
+
warnings.filterwarnings('ignore', category=RuntimeWarning)
|
|
17
|
+
warnings.filterwarnings('ignore', category=FutureWarning)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# The old w_kinetics
|
|
21
|
+
class WKinetics:
|
|
22
|
+
def w_kinetics(self):
|
|
23
|
+
pi = self.progress.indicator
|
|
24
|
+
pi.new_operation('Initializing')
|
|
25
|
+
|
|
26
|
+
self.data_reader.open('r')
|
|
27
|
+
self.open_files()
|
|
28
|
+
nstates = self.assignments_file.attrs['nstates']
|
|
29
|
+
start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file)
|
|
30
|
+
iter_count = stop_iter - start_iter
|
|
31
|
+
durations_ds = self.output_file.replace_dataset(
|
|
32
|
+
'durations',
|
|
33
|
+
shape=(iter_count, 0),
|
|
34
|
+
maxshape=(iter_count, None),
|
|
35
|
+
dtype=ed_list_dtype,
|
|
36
|
+
chunks=(1, 15360) if self.do_compression else None,
|
|
37
|
+
shuffle=self.do_compression,
|
|
38
|
+
compression=9 if self.do_compression else None,
|
|
39
|
+
)
|
|
40
|
+
durations_count_ds = self.output_file.replace_dataset(
|
|
41
|
+
'duration_count', shape=(iter_count,), dtype=np.int_, shuffle=True, compression=9
|
|
42
|
+
)
|
|
43
|
+
cond_fluxes_ds = self.output_file.replace_dataset(
|
|
44
|
+
'conditional_fluxes',
|
|
45
|
+
shape=(iter_count, nstates, nstates),
|
|
46
|
+
dtype=weight_dtype,
|
|
47
|
+
chunks=(h5io.calc_chunksize((iter_count, nstates, nstates), weight_dtype) if self.do_compression else None),
|
|
48
|
+
shuffle=self.do_compression,
|
|
49
|
+
compression=9 if self.do_compression else None,
|
|
50
|
+
)
|
|
51
|
+
total_fluxes_ds = self.output_file.replace_dataset(
|
|
52
|
+
'total_fluxes',
|
|
53
|
+
shape=(iter_count, nstates),
|
|
54
|
+
dtype=weight_dtype,
|
|
55
|
+
chunks=(h5io.calc_chunksize((iter_count, nstates), weight_dtype) if self.do_compression else None),
|
|
56
|
+
shuffle=self.do_compression,
|
|
57
|
+
compression=9 if self.do_compression else None,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
cond_arrival_counts_ds = self.output_file.replace_dataset(
|
|
61
|
+
'conditional_arrivals',
|
|
62
|
+
shape=(iter_count, nstates, nstates),
|
|
63
|
+
dtype=np.uint,
|
|
64
|
+
chunks=(h5io.calc_chunksize((iter_count, nstates, nstates), np.uint) if self.do_compression else None),
|
|
65
|
+
shuffle=self.do_compression,
|
|
66
|
+
compression=9 if self.do_compression else None,
|
|
67
|
+
)
|
|
68
|
+
arrival_counts_ds = self.output_file.replace_dataset(
|
|
69
|
+
'arrivals',
|
|
70
|
+
shape=(iter_count, nstates),
|
|
71
|
+
dtype=np.uint,
|
|
72
|
+
chunks=(h5io.calc_chunksize((iter_count, nstates), np.uint) if self.do_compression else None),
|
|
73
|
+
shuffle=self.do_compression,
|
|
74
|
+
compression=9 if self.do_compression else None,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# copy state labels for convenience
|
|
78
|
+
self.output_file.replace_dataset('state_labels', data=self.assignments_file['state_labels'][...])
|
|
79
|
+
|
|
80
|
+
# Put nice labels on things
|
|
81
|
+
for ds in (self.output_file, durations_count_ds, cond_fluxes_ds, total_fluxes_ds):
|
|
82
|
+
h5io.stamp_iter_range(ds, start_iter, stop_iter)
|
|
83
|
+
|
|
84
|
+
# Calculate instantaneous rate matrices and trace trajectories
|
|
85
|
+
last_state = None
|
|
86
|
+
pi.new_operation('Tracing trajectories', iter_count)
|
|
87
|
+
for iiter, n_iter in enumerate(range(start_iter, stop_iter)):
|
|
88
|
+
# Get data from the main HDF5 file
|
|
89
|
+
iter_group = self.data_reader.get_iter_group(n_iter)
|
|
90
|
+
seg_index = iter_group['seg_index']
|
|
91
|
+
nsegs, npts = iter_group['pcoord'].shape[0:2]
|
|
92
|
+
weights = seg_index['weight']
|
|
93
|
+
# parent_ids = seg_index['parent_id']
|
|
94
|
+
parent_ids = self.data_reader.parent_id_dsspec.get_iter_data(n_iter)
|
|
95
|
+
|
|
96
|
+
# Get bin and traj. ensemble assignments from the previously-generated assignments file
|
|
97
|
+
assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter)
|
|
98
|
+
bin_assignments = np.require(
|
|
99
|
+
self.assignments_file['assignments'][assignment_iiter + np.s_[:nsegs, :npts]], dtype=index_dtype
|
|
100
|
+
)
|
|
101
|
+
label_assignments = np.require(
|
|
102
|
+
self.assignments_file['trajlabels'][assignment_iiter + np.s_[:nsegs, :npts]], dtype=index_dtype
|
|
103
|
+
)
|
|
104
|
+
state_assignments = np.require(
|
|
105
|
+
self.assignments_file['statelabels'][assignment_iiter + np.s_[:nsegs, :npts]], dtype=index_dtype
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Prepare to run analysis
|
|
109
|
+
cond_fluxes = np.zeros((nstates, nstates), weight_dtype)
|
|
110
|
+
total_fluxes = np.zeros((nstates,), weight_dtype)
|
|
111
|
+
cond_counts = np.zeros((nstates, nstates), np.uint)
|
|
112
|
+
total_counts = np.zeros((nstates,), np.uint)
|
|
113
|
+
durations = []
|
|
114
|
+
|
|
115
|
+
# Estimate macrostate fluxes and calculate event durations using trajectory tracing
|
|
116
|
+
# state is opaque to the find_macrostate_transitions function
|
|
117
|
+
dt = 1.0 if npts == 1 else 1.0 / (npts - 1)
|
|
118
|
+
state = _fast_transition_state_copy(iiter, nstates, parent_ids, last_state)
|
|
119
|
+
find_macrostate_transitions(
|
|
120
|
+
nstates,
|
|
121
|
+
weights,
|
|
122
|
+
label_assignments,
|
|
123
|
+
state_assignments,
|
|
124
|
+
dt,
|
|
125
|
+
state,
|
|
126
|
+
cond_fluxes,
|
|
127
|
+
cond_counts,
|
|
128
|
+
total_fluxes,
|
|
129
|
+
total_counts,
|
|
130
|
+
durations,
|
|
131
|
+
)
|
|
132
|
+
last_state = state
|
|
133
|
+
|
|
134
|
+
# Store trace-based kinetics data
|
|
135
|
+
cond_fluxes_ds[iiter] = cond_fluxes
|
|
136
|
+
total_fluxes_ds[iiter] = total_fluxes
|
|
137
|
+
arrival_counts_ds[iiter] = total_counts
|
|
138
|
+
cond_arrival_counts_ds[iiter] = cond_counts
|
|
139
|
+
|
|
140
|
+
durations_count_ds[iiter] = len(durations)
|
|
141
|
+
if len(durations) > 0:
|
|
142
|
+
durations_ds.resize((iter_count, max(len(durations), durations_ds.shape[1])))
|
|
143
|
+
durations_ds[iiter, : len(durations)] = durations
|
|
144
|
+
|
|
145
|
+
# Do a little manual clean-up to prevent memory explosion
|
|
146
|
+
del iter_group, weights, parent_ids, bin_assignments, label_assignments, state, cond_fluxes, total_fluxes
|
|
147
|
+
pi.progress += 1
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Routines for implementing Letteri et al.'s macrostate-to-macrostate rate calculations
|
|
3
|
+
using extrapolation to steady-state populations from average rate matrices
|
|
4
|
+
|
|
5
|
+
Internally, "labeled" objects (bin populations labeled by history, rate matrix elements labeled
|
|
6
|
+
by history) are stored as nested arrays -- e.g. rates[initial_label, final_label, initial_bin, final_bin].
|
|
7
|
+
These are converted to the flat forms required for, say, eigenvalue calculations internally, and the
|
|
8
|
+
results converted back. This is because these conversions are not expensive, and saves users of
|
|
9
|
+
this code from having to know how the flattened indexing works (something I screwed up all too
|
|
10
|
+
easily during development) -- mcz
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import warnings
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
import scipy.linalg
|
|
18
|
+
|
|
19
|
+
from westpa.core.data_manager import weight_dtype
|
|
20
|
+
|
|
21
|
+
from ._kinetics import (
|
|
22
|
+
calculate_labeled_fluxes, # @UnresolvedImport
|
|
23
|
+
calculate_labeled_fluxes_alllags, # @UnresolvedImport
|
|
24
|
+
labeled_flux_to_rate, # @UnresolvedImport
|
|
25
|
+
nested_to_flat_matrix,
|
|
26
|
+
nested_to_flat_vector, # @UnresolvedImport
|
|
27
|
+
flat_to_nested_vector,
|
|
28
|
+
_reduce_labeled_rate_matrix_to_macro,
|
|
29
|
+
) # @UnresolvedImport
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
log = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ConsistencyWarning(UserWarning):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_steady_state(rates):
|
|
40
|
+
'''Get steady state solution for a rate matrix. As an optimization, returns the
|
|
41
|
+
flattened labeled population vector (of length nstates*nbins); to convert to the
|
|
42
|
+
nested vector used for storage, use nested_to_flat_vector().'''
|
|
43
|
+
|
|
44
|
+
rates = rates.copy()
|
|
45
|
+
|
|
46
|
+
# Convert to a transition probability matrix
|
|
47
|
+
for i in range(rates.shape[0]):
|
|
48
|
+
rowsum = rates[i, :].sum()
|
|
49
|
+
if rowsum > 0:
|
|
50
|
+
rates[i, :] = rates[i, :] / rowsum
|
|
51
|
+
else:
|
|
52
|
+
if rates[:, i].sum() != 0:
|
|
53
|
+
warnings.warn('sink microstate in rate matrix', ConsistencyWarning)
|
|
54
|
+
rates[i, :] = 0
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
vals, vecs = scipy.linalg.eig(rates.T)
|
|
58
|
+
except Exception:
|
|
59
|
+
log.debug('exception obtaining eigenvectors', exc_info=True)
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
vals = np.abs(vals)
|
|
63
|
+
log.debug('eigenvalues: {!r}'.format(list(reversed(sorted(vals)))))
|
|
64
|
+
asort = np.argsort(vals)
|
|
65
|
+
vec = vecs[:, asort[-1]]
|
|
66
|
+
ss = np.abs(vec)
|
|
67
|
+
|
|
68
|
+
ss /= ss.sum()
|
|
69
|
+
return ss
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_macrostate_rates(labeled_rates, labeled_pops, extrapolate=True):
|
|
73
|
+
'''Using a labeled rate matrix and labeled bin populations, calculate the steady state
|
|
74
|
+
probability distribution and consequent state-to-state rates.
|
|
75
|
+
|
|
76
|
+
Returns ``(ss, macro_rates)``, where ``ss`` is the steady-state probability distribution
|
|
77
|
+
and ``macro_rates`` is the state-to-state rate matrix.'''
|
|
78
|
+
|
|
79
|
+
nstates, nbins = labeled_pops.shape
|
|
80
|
+
|
|
81
|
+
rates = nested_to_flat_matrix(labeled_rates)
|
|
82
|
+
|
|
83
|
+
# Find steady-state solution
|
|
84
|
+
if extrapolate:
|
|
85
|
+
ss = get_steady_state(rates)
|
|
86
|
+
if ss is None:
|
|
87
|
+
warnings.warn('no well-defined steady state; using average populations', ConsistencyWarning)
|
|
88
|
+
ss = nested_to_flat_vector(labeled_pops)
|
|
89
|
+
else:
|
|
90
|
+
ss = nested_to_flat_vector(labeled_pops)
|
|
91
|
+
|
|
92
|
+
macro_rates = _reduce_labeled_rate_matrix_to_macro(nstates, nbins, rates, ss)
|
|
93
|
+
|
|
94
|
+
return flat_to_nested_vector(nstates, nbins, ss), macro_rates
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def estimate_rates(
|
|
98
|
+
nbins,
|
|
99
|
+
state_labels,
|
|
100
|
+
weights,
|
|
101
|
+
parent_ids,
|
|
102
|
+
bin_assignments,
|
|
103
|
+
label_assignments,
|
|
104
|
+
state_map,
|
|
105
|
+
labeled_pops,
|
|
106
|
+
all_lags=False,
|
|
107
|
+
labeled_fluxes=None,
|
|
108
|
+
labeled_rates=None,
|
|
109
|
+
unlabeled_rates=None,
|
|
110
|
+
):
|
|
111
|
+
'''Estimate fluxes and rates over multiple iterations. The number of iterations is determined by how many
|
|
112
|
+
vectors of weights, parent IDs, bin assignments, and label assignments are passed.
|
|
113
|
+
|
|
114
|
+
If ``all_lags`` is true, then the average is over all possible lags within the length-N window given, otherwise
|
|
115
|
+
simply the length N lag.
|
|
116
|
+
|
|
117
|
+
Returns labeled flux matrix, labeled rate matrix, and unlabeled rate matrix.'''
|
|
118
|
+
|
|
119
|
+
assert len(weights) == len(parent_ids) == len(bin_assignments) == len(label_assignments)
|
|
120
|
+
nstates = len(state_labels)
|
|
121
|
+
nbins = labeled_pops.shape[1] - 1
|
|
122
|
+
|
|
123
|
+
# Prepare output arrays
|
|
124
|
+
if labeled_fluxes is None:
|
|
125
|
+
labeled_fluxes = np.zeros((nstates, nstates, nbins, nbins), weight_dtype)
|
|
126
|
+
else:
|
|
127
|
+
labeled_fluxes.fill(0.0)
|
|
128
|
+
|
|
129
|
+
if labeled_rates is None:
|
|
130
|
+
labeled_rates = np.zeros_like(labeled_fluxes)
|
|
131
|
+
else:
|
|
132
|
+
labeled_rates.fill(0.0)
|
|
133
|
+
|
|
134
|
+
if unlabeled_rates is None:
|
|
135
|
+
unlabeled_rates = np.zeros((nbins, nbins), weight_dtype)
|
|
136
|
+
else:
|
|
137
|
+
unlabeled_rates.fill(0.0)
|
|
138
|
+
|
|
139
|
+
# Loop over all possible windows to accumulate flux matrix
|
|
140
|
+
# flux matrix is [initial_label][final_label][initial_bin][final_bin]
|
|
141
|
+
if all_lags:
|
|
142
|
+
twindow = calculate_labeled_fluxes_alllags(nstates, weights, parent_ids, bin_assignments, label_assignments, labeled_fluxes)
|
|
143
|
+
else:
|
|
144
|
+
twindow = calculate_labeled_fluxes(nstates, weights, parent_ids, bin_assignments, label_assignments, labeled_fluxes)
|
|
145
|
+
labeled_fluxes /= twindow
|
|
146
|
+
|
|
147
|
+
# Calculate rate matrix for this window, using populations from the last iteration (which correspond
|
|
148
|
+
# to the weights that contribute to the flux matrix)
|
|
149
|
+
labeled_flux_to_rate(labeled_fluxes, labeled_pops, labeled_rates)
|
|
150
|
+
|
|
151
|
+
# Calculate an unlabeled rate matrix
|
|
152
|
+
unlabeled_fluxes = np.sum(labeled_fluxes, axis=(0, 1))
|
|
153
|
+
unlabeled_pops = labeled_pops.sum(axis=0)
|
|
154
|
+
unlabeled_rates[...] = labeled_flux_to_rate(unlabeled_fluxes[None, None, :, :], unlabeled_pops[None, :])[0, 0]
|
|
155
|
+
|
|
156
|
+
return labeled_fluxes, labeled_rates, unlabeled_rates
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
from collections import namedtuple
|
|
2
|
+
from itertools import zip_longest
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
import westpa
|
|
7
|
+
from westpa.core.kinetics._kinetics import (
|
|
8
|
+
flux_assign,
|
|
9
|
+
pop_assign,
|
|
10
|
+
calc_rates,
|
|
11
|
+
StreamingStats1D,
|
|
12
|
+
StreamingStats2D,
|
|
13
|
+
) # @UnresolvedImport
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Named tuple proxy for StreamingStats class
|
|
17
|
+
StreamingStatsTuple = namedtuple('StreamingStatsTuple', ['M1', 'M2', 'n'])
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def grouper(n, iterable, fillvalue=None):
|
|
21
|
+
"Collect data into fixed-length chunks or blocks"
|
|
22
|
+
# grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx
|
|
23
|
+
args = [iter(iterable)] * n
|
|
24
|
+
return zip_longest(fillvalue=fillvalue, *args)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def tuple2stats(stat_tuple):
|
|
28
|
+
ndims = stat_tuple.M1.ndim
|
|
29
|
+
assert ndims == 1 or ndims == 2
|
|
30
|
+
|
|
31
|
+
if ndims == 2:
|
|
32
|
+
stats = StreamingStats2D(stat_tuple.M1.shape)
|
|
33
|
+
elif ndims == 1:
|
|
34
|
+
stats = StreamingStats1D(stat_tuple.M1.shape[0])
|
|
35
|
+
else:
|
|
36
|
+
raise ValueError
|
|
37
|
+
|
|
38
|
+
stats.M1 = stat_tuple.M1
|
|
39
|
+
stats.M2 = stat_tuple.M2
|
|
40
|
+
stats.n = stat_tuple.n
|
|
41
|
+
|
|
42
|
+
return stats
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def process_iter_chunk(bin_mapper, iter_indices, iter_data=None):
|
|
46
|
+
'''Calculate the flux matrices and populations of a set of iterations specified
|
|
47
|
+
by iter_indices. Optionally provide the necessary arrays to perform the calculation
|
|
48
|
+
in iter_data. Otherwise get data from the data_manager directly.
|
|
49
|
+
'''
|
|
50
|
+
|
|
51
|
+
data_manager = westpa.rc.get_data_manager()
|
|
52
|
+
system = westpa.rc.get_system_driver()
|
|
53
|
+
|
|
54
|
+
# itercount = len(iter_indices)
|
|
55
|
+
nbins = bin_mapper.nbins
|
|
56
|
+
|
|
57
|
+
flux_stats = StreamingStats2D((nbins, nbins))
|
|
58
|
+
rate_stats = StreamingStats2D((nbins, nbins))
|
|
59
|
+
pop_stats = StreamingStats1D(nbins)
|
|
60
|
+
|
|
61
|
+
nomask1d = np.zeros((nbins,), np.uint8)
|
|
62
|
+
nomask2d = np.zeros((nbins, nbins), np.uint8)
|
|
63
|
+
rate_mask = np.zeros((nbins, nbins), np.uint8)
|
|
64
|
+
|
|
65
|
+
flux_matrix = np.zeros((nbins, nbins), np.float64)
|
|
66
|
+
rate_matrix = np.zeros((nbins, nbins), np.float64)
|
|
67
|
+
population_vector = np.zeros((nbins,), np.float64)
|
|
68
|
+
|
|
69
|
+
pcoord_len = system.pcoord_len
|
|
70
|
+
assign = bin_mapper.assign
|
|
71
|
+
|
|
72
|
+
for iiter, n_iter in enumerate(iter_indices):
|
|
73
|
+
flux_matrix.fill(0.0)
|
|
74
|
+
population_vector.fill(0.0)
|
|
75
|
+
|
|
76
|
+
if iter_data:
|
|
77
|
+
iter_group_name = 'iter_{:0{prec}d}'.format(int(n_iter), prec=data_manager.iter_prec)
|
|
78
|
+
iter_group = iter_data[iter_group_name]
|
|
79
|
+
else:
|
|
80
|
+
iter_group = data_manager.get_iter_group(n_iter)
|
|
81
|
+
|
|
82
|
+
# first, account for the flux due to recycling
|
|
83
|
+
# we access the hdf5 file directly to avoid nearly 50% overhead of creating a ton of
|
|
84
|
+
# tiny newweightentry objects
|
|
85
|
+
try:
|
|
86
|
+
nwgroup = iter_group['new_weights']
|
|
87
|
+
except KeyError:
|
|
88
|
+
# no new weight data
|
|
89
|
+
pass
|
|
90
|
+
else:
|
|
91
|
+
if iter_data:
|
|
92
|
+
index = None
|
|
93
|
+
weights = nwgroup['weight']
|
|
94
|
+
prev_init_pcoords = nwgroup['prev_init_pcoord']
|
|
95
|
+
new_init_pcoords = nwgroup['new_init_pcoord']
|
|
96
|
+
else:
|
|
97
|
+
index = nwgroup['index'][...]
|
|
98
|
+
weights = index['weight']
|
|
99
|
+
prev_init_pcoords = nwgroup['prev_init_pcoord'][...]
|
|
100
|
+
new_init_pcoords = nwgroup['new_init_pcoord'][...]
|
|
101
|
+
|
|
102
|
+
prev_init_assignments = assign(prev_init_pcoords)
|
|
103
|
+
new_init_assignments = assign(new_init_pcoords)
|
|
104
|
+
|
|
105
|
+
flux_assign(weights, prev_init_assignments, new_init_assignments, flux_matrix)
|
|
106
|
+
# for (weight,i,j) in izip (weights, prev_init_assignments, new_init_assignments):
|
|
107
|
+
# flux_matrices[iiter,i,j] += weight
|
|
108
|
+
del index
|
|
109
|
+
del prev_init_pcoords, new_init_pcoords, prev_init_assignments, new_init_assignments, weights
|
|
110
|
+
|
|
111
|
+
# iter_group = data_manager.get_iter_group(n_iter)
|
|
112
|
+
if iter_data:
|
|
113
|
+
weights = iter_group['weight']
|
|
114
|
+
initial_pcoords = iter_group['initial_pcoords']
|
|
115
|
+
final_pcoords = iter_group['final_pcoords']
|
|
116
|
+
else:
|
|
117
|
+
weights = iter_group['seg_index']['weight']
|
|
118
|
+
initial_pcoords = iter_group['pcoord'][:, 0]
|
|
119
|
+
final_pcoords = iter_group['pcoord'][:, pcoord_len - 1]
|
|
120
|
+
|
|
121
|
+
initial_assignments = assign(initial_pcoords)
|
|
122
|
+
final_assignments = assign(final_pcoords)
|
|
123
|
+
|
|
124
|
+
flux_assign(weights, initial_assignments, final_assignments, flux_matrix)
|
|
125
|
+
pop_assign(weights, initial_assignments, population_vector)
|
|
126
|
+
|
|
127
|
+
flux_stats.update(flux_matrix, nomask2d)
|
|
128
|
+
pop_stats.update(population_vector, nomask1d)
|
|
129
|
+
|
|
130
|
+
calc_rates(flux_matrix, population_vector, rate_matrix, rate_mask)
|
|
131
|
+
rate_stats.update(rate_matrix, rate_mask)
|
|
132
|
+
|
|
133
|
+
del weights
|
|
134
|
+
del initial_assignments, final_assignments
|
|
135
|
+
del initial_pcoords, final_pcoords
|
|
136
|
+
del iter_group
|
|
137
|
+
|
|
138
|
+
# Create namedtuple proxies for the cython StreamingStats objects
|
|
139
|
+
# since the typed memoryviews class variables do not seem to return
|
|
140
|
+
# cleanly from the zmq workers
|
|
141
|
+
c_flux_stats = StreamingStatsTuple(flux_stats.M1, flux_stats.M2, flux_stats.n)
|
|
142
|
+
c_rate_stats = StreamingStatsTuple(rate_stats.M1, rate_stats.M2, rate_stats.n)
|
|
143
|
+
c_pop_stats = StreamingStatsTuple(pop_stats.M1, pop_stats.M2, pop_stats.n)
|
|
144
|
+
|
|
145
|
+
return c_flux_stats, c_rate_stats, c_pop_stats
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
class RateAverager:
|
|
149
|
+
'''Calculate bin-to-bin kinetic properties (fluxes, rates, populations) at
|
|
150
|
+
1-tau resolution'''
|
|
151
|
+
|
|
152
|
+
def __init__(self, bin_mapper, system=None, data_manager=None, work_manager=None):
|
|
153
|
+
self.bin_mapper = bin_mapper
|
|
154
|
+
self.data_manager = data_manager or westpa.rc.get_data_manager()
|
|
155
|
+
self.system = system or westpa.rc.get_system_driver()
|
|
156
|
+
self.work_manager = work_manager or westpa.rc.get_work_manager()
|
|
157
|
+
|
|
158
|
+
def extract_data(self, iter_indices):
|
|
159
|
+
'''Extract data from the data_manger and place in dict mirroring the same
|
|
160
|
+
underlying layout.'''
|
|
161
|
+
|
|
162
|
+
data = {}
|
|
163
|
+
pcoord_len = self.system.pcoord_len
|
|
164
|
+
|
|
165
|
+
for n_iter in iter_indices:
|
|
166
|
+
iter_group_name = 'iter_{:0{prec}d}'.format(int(n_iter), prec=self.data_manager.iter_prec)
|
|
167
|
+
iter_group = self.data_manager.get_iter_group(n_iter)
|
|
168
|
+
di = data[iter_group_name] = {}
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
nwgroup = iter_group['new_weights']
|
|
172
|
+
except KeyError:
|
|
173
|
+
# no new weight data
|
|
174
|
+
pass
|
|
175
|
+
else:
|
|
176
|
+
di_nw = di['new_weights'] = {}
|
|
177
|
+
di_nw['weight'] = nwgroup['index'][...]['weight']
|
|
178
|
+
di_nw['prev_init_pcoord'] = nwgroup['prev_init_pcoord'][...]
|
|
179
|
+
di_nw['new_init_pcoord'] = nwgroup['new_init_pcoord'][...]
|
|
180
|
+
|
|
181
|
+
di['weight'] = iter_group['seg_index']['weight']
|
|
182
|
+
di['initial_pcoords'] = iter_group['pcoord'][:, 0]
|
|
183
|
+
di['final_pcoords'] = iter_group['pcoord'][:, pcoord_len - 1]
|
|
184
|
+
|
|
185
|
+
return data
|
|
186
|
+
|
|
187
|
+
def task_generator(self, iter_start, iter_stop, block_size):
|
|
188
|
+
for iter_block in grouper(block_size, range(iter_start, iter_stop)):
|
|
189
|
+
iter_block = [x for x in iter_block if x is not None]
|
|
190
|
+
iter_data = self.extract_data(iter_block)
|
|
191
|
+
yield (process_iter_chunk, (self.bin_mapper, iter_block), {'iter_data': iter_data})
|
|
192
|
+
del iter_data
|
|
193
|
+
|
|
194
|
+
def calculate(self, iter_start=None, iter_stop=None, n_blocks=1, queue_size=1):
|
|
195
|
+
'''Read the HDF5 file and collect flux matrices and population vectors
|
|
196
|
+
for each bin for each iteration in the range [iter_start, iter_stop). Break
|
|
197
|
+
the calculation into n_blocks blocks. If the calculation is broken up into
|
|
198
|
+
more than one block, queue_size specifies the maxmimum number of tasks in
|
|
199
|
+
the work queue.'''
|
|
200
|
+
|
|
201
|
+
iter_start = iter_start or 1
|
|
202
|
+
iter_stop = iter_stop or self.data_manager.current_iteration
|
|
203
|
+
|
|
204
|
+
itercount = iter_stop - iter_start
|
|
205
|
+
block_size = max(1, itercount // n_blocks)
|
|
206
|
+
nbins = self.bin_mapper.nbins
|
|
207
|
+
|
|
208
|
+
if n_blocks == 1:
|
|
209
|
+
flux_stats_t, rate_stats_t, population_stats_t = process_iter_chunk(self.bin_mapper, list(range(iter_start, iter_stop)))
|
|
210
|
+
|
|
211
|
+
flux_stats = tuple2stats(flux_stats_t)
|
|
212
|
+
rate_stats = tuple2stats(rate_stats_t)
|
|
213
|
+
population_stats = tuple2stats(population_stats_t)
|
|
214
|
+
else:
|
|
215
|
+
flux_stats = StreamingStats2D((nbins, nbins))
|
|
216
|
+
rate_stats = StreamingStats2D((nbins, nbins))
|
|
217
|
+
population_stats = StreamingStats1D(nbins)
|
|
218
|
+
|
|
219
|
+
task_generator = self.task_generator(iter_start, iter_stop, block_size)
|
|
220
|
+
|
|
221
|
+
for future in self.work_manager.submit_as_completed(task_generator, queue_size):
|
|
222
|
+
chunk_flux_stats_t, chunk_rate_stats_t, chunk_pop_stats_t = future.get_result()
|
|
223
|
+
|
|
224
|
+
chunk_flux_stats = tuple2stats(chunk_flux_stats_t)
|
|
225
|
+
chunk_rate_stats = tuple2stats(chunk_rate_stats_t)
|
|
226
|
+
chunk_pop_stats = tuple2stats(chunk_pop_stats_t)
|
|
227
|
+
|
|
228
|
+
# Update statistics with chunked subsets
|
|
229
|
+
flux_stats += chunk_flux_stats
|
|
230
|
+
rate_stats += chunk_rate_stats
|
|
231
|
+
population_stats += chunk_pop_stats
|
|
232
|
+
|
|
233
|
+
self.average_flux = flux_stats.mean
|
|
234
|
+
self.stderr_flux = np.nan_to_num(np.sqrt(flux_stats.var) / flux_stats.n)
|
|
235
|
+
|
|
236
|
+
self.average_populations = population_stats.mean
|
|
237
|
+
self.stderr_populations = np.nan_to_num(np.sqrt(population_stats.var) / population_stats.n)
|
|
238
|
+
|
|
239
|
+
self.average_rate = rate_stats.mean
|
|
240
|
+
self.stderr_rate = np.nan_to_num(np.sqrt(rate_stats.var) / rate_stats.n)
|
|
241
|
+
|
|
242
|
+
assert ~np.any(np.isinf(self.stderr_flux))
|
|
243
|
+
assert ~np.any(np.isinf(self.stderr_rate))
|
|
244
|
+
assert ~np.any(np.isinf(self.stderr_populations))
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
if __name__ == '__main__':
|
|
248
|
+
# Tests this file on the west.h5 data in the current (sim root) directory
|
|
249
|
+
westpa.rc.read_config()
|
|
250
|
+
system = westpa.rc.get_system_driver()
|
|
251
|
+
data_manager = westpa.rc.get_data_manager()
|
|
252
|
+
data_manager.open_backing('r')
|
|
253
|
+
averager = RateAverager(system.bin_mapper)
|
|
254
|
+
averager.calculate()
|
|
255
|
+
|
|
256
|
+
print('Population mean and standard error')
|
|
257
|
+
print(averager.average_populations)
|
|
258
|
+
print(averager.stderr_populations)
|
|
259
|
+
|
|
260
|
+
print('Flux matrix, mean and standard error')
|
|
261
|
+
print(averager.average_flux)
|
|
262
|
+
print(averager.stderr_flux)
|
|
263
|
+
|
|
264
|
+
print('Rate matrix, mean and standard error')
|
|
265
|
+
print(averager.average_rate)
|
|
266
|
+
print(averager.stderr_rate)
|