westpa 2022.10__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of westpa might be problematic. Click here for more details.

Files changed (150) hide show
  1. westpa/__init__.py +14 -0
  2. westpa/_version.py +21 -0
  3. westpa/analysis/__init__.py +5 -0
  4. westpa/analysis/core.py +746 -0
  5. westpa/analysis/statistics.py +27 -0
  6. westpa/analysis/trajectories.py +360 -0
  7. westpa/cli/__init__.py +0 -0
  8. westpa/cli/core/__init__.py +0 -0
  9. westpa/cli/core/w_fork.py +152 -0
  10. westpa/cli/core/w_init.py +230 -0
  11. westpa/cli/core/w_run.py +77 -0
  12. westpa/cli/core/w_states.py +212 -0
  13. westpa/cli/core/w_succ.py +99 -0
  14. westpa/cli/core/w_truncate.py +59 -0
  15. westpa/cli/tools/__init__.py +0 -0
  16. westpa/cli/tools/ploterr.py +506 -0
  17. westpa/cli/tools/plothist.py +706 -0
  18. westpa/cli/tools/w_assign.py +596 -0
  19. westpa/cli/tools/w_bins.py +166 -0
  20. westpa/cli/tools/w_crawl.py +119 -0
  21. westpa/cli/tools/w_direct.py +547 -0
  22. westpa/cli/tools/w_dumpsegs.py +94 -0
  23. westpa/cli/tools/w_eddist.py +506 -0
  24. westpa/cli/tools/w_fluxanl.py +378 -0
  25. westpa/cli/tools/w_ipa.py +833 -0
  26. westpa/cli/tools/w_kinavg.py +127 -0
  27. westpa/cli/tools/w_kinetics.py +96 -0
  28. westpa/cli/tools/w_multi_west.py +414 -0
  29. westpa/cli/tools/w_ntop.py +213 -0
  30. westpa/cli/tools/w_pdist.py +515 -0
  31. westpa/cli/tools/w_postanalysis_matrix.py +82 -0
  32. westpa/cli/tools/w_postanalysis_reweight.py +53 -0
  33. westpa/cli/tools/w_red.py +486 -0
  34. westpa/cli/tools/w_reweight.py +780 -0
  35. westpa/cli/tools/w_select.py +226 -0
  36. westpa/cli/tools/w_stateprobs.py +111 -0
  37. westpa/cli/tools/w_trace.py +599 -0
  38. westpa/core/__init__.py +0 -0
  39. westpa/core/_rc.py +673 -0
  40. westpa/core/binning/__init__.py +55 -0
  41. westpa/core/binning/_assign.cpython-312-x86_64-linux-gnu.so +0 -0
  42. westpa/core/binning/assign.py +449 -0
  43. westpa/core/binning/binless.py +96 -0
  44. westpa/core/binning/binless_driver.py +54 -0
  45. westpa/core/binning/binless_manager.py +190 -0
  46. westpa/core/binning/bins.py +47 -0
  47. westpa/core/binning/mab.py +427 -0
  48. westpa/core/binning/mab_driver.py +54 -0
  49. westpa/core/binning/mab_manager.py +198 -0
  50. westpa/core/data_manager.py +1694 -0
  51. westpa/core/extloader.py +74 -0
  52. westpa/core/h5io.py +995 -0
  53. westpa/core/kinetics/__init__.py +24 -0
  54. westpa/core/kinetics/_kinetics.cpython-312-x86_64-linux-gnu.so +0 -0
  55. westpa/core/kinetics/events.py +147 -0
  56. westpa/core/kinetics/matrates.py +156 -0
  57. westpa/core/kinetics/rate_averaging.py +266 -0
  58. westpa/core/progress.py +218 -0
  59. westpa/core/propagators/__init__.py +54 -0
  60. westpa/core/propagators/executable.py +715 -0
  61. westpa/core/reweight/__init__.py +14 -0
  62. westpa/core/reweight/_reweight.cpython-312-x86_64-linux-gnu.so +0 -0
  63. westpa/core/reweight/matrix.py +126 -0
  64. westpa/core/segment.py +119 -0
  65. westpa/core/sim_manager.py +830 -0
  66. westpa/core/states.py +359 -0
  67. westpa/core/systems.py +93 -0
  68. westpa/core/textio.py +74 -0
  69. westpa/core/trajectory.py +330 -0
  70. westpa/core/we_driver.py +908 -0
  71. westpa/core/wm_ops.py +43 -0
  72. westpa/core/yamlcfg.py +391 -0
  73. westpa/fasthist/__init__.py +34 -0
  74. westpa/fasthist/__main__.py +110 -0
  75. westpa/fasthist/_fasthist.cpython-312-x86_64-linux-gnu.so +0 -0
  76. westpa/mclib/__init__.py +264 -0
  77. westpa/mclib/__main__.py +28 -0
  78. westpa/mclib/_mclib.cpython-312-x86_64-linux-gnu.so +0 -0
  79. westpa/oldtools/__init__.py +4 -0
  80. westpa/oldtools/aframe/__init__.py +35 -0
  81. westpa/oldtools/aframe/atool.py +75 -0
  82. westpa/oldtools/aframe/base_mixin.py +26 -0
  83. westpa/oldtools/aframe/binning.py +178 -0
  84. westpa/oldtools/aframe/data_reader.py +560 -0
  85. westpa/oldtools/aframe/iter_range.py +200 -0
  86. westpa/oldtools/aframe/kinetics.py +117 -0
  87. westpa/oldtools/aframe/mcbs.py +146 -0
  88. westpa/oldtools/aframe/output.py +39 -0
  89. westpa/oldtools/aframe/plotting.py +90 -0
  90. westpa/oldtools/aframe/trajwalker.py +126 -0
  91. westpa/oldtools/aframe/transitions.py +469 -0
  92. westpa/oldtools/cmds/__init__.py +0 -0
  93. westpa/oldtools/cmds/w_ttimes.py +358 -0
  94. westpa/oldtools/files.py +34 -0
  95. westpa/oldtools/miscfn.py +23 -0
  96. westpa/oldtools/stats/__init__.py +4 -0
  97. westpa/oldtools/stats/accumulator.py +35 -0
  98. westpa/oldtools/stats/edfs.py +129 -0
  99. westpa/oldtools/stats/mcbs.py +89 -0
  100. westpa/tools/__init__.py +33 -0
  101. westpa/tools/binning.py +472 -0
  102. westpa/tools/core.py +340 -0
  103. westpa/tools/data_reader.py +159 -0
  104. westpa/tools/dtypes.py +31 -0
  105. westpa/tools/iter_range.py +198 -0
  106. westpa/tools/kinetics_tool.py +340 -0
  107. westpa/tools/plot.py +283 -0
  108. westpa/tools/progress.py +17 -0
  109. westpa/tools/selected_segs.py +154 -0
  110. westpa/tools/wipi.py +751 -0
  111. westpa/trajtree/__init__.py +4 -0
  112. westpa/trajtree/_trajtree.cpython-312-x86_64-linux-gnu.so +0 -0
  113. westpa/trajtree/trajtree.py +117 -0
  114. westpa/westext/__init__.py +0 -0
  115. westpa/westext/adaptvoronoi/__init__.py +3 -0
  116. westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
  117. westpa/westext/hamsm_restarting/__init__.py +3 -0
  118. westpa/westext/hamsm_restarting/example_overrides.py +35 -0
  119. westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
  120. westpa/westext/stringmethod/__init__.py +11 -0
  121. westpa/westext/stringmethod/fourier_fitting.py +69 -0
  122. westpa/westext/stringmethod/string_driver.py +253 -0
  123. westpa/westext/stringmethod/string_method.py +306 -0
  124. westpa/westext/weed/BinCluster.py +180 -0
  125. westpa/westext/weed/ProbAdjustEquil.py +100 -0
  126. westpa/westext/weed/UncertMath.py +247 -0
  127. westpa/westext/weed/__init__.py +10 -0
  128. westpa/westext/weed/weed_driver.py +182 -0
  129. westpa/westext/wess/ProbAdjust.py +101 -0
  130. westpa/westext/wess/__init__.py +6 -0
  131. westpa/westext/wess/wess_driver.py +207 -0
  132. westpa/work_managers/__init__.py +57 -0
  133. westpa/work_managers/core.py +396 -0
  134. westpa/work_managers/environment.py +134 -0
  135. westpa/work_managers/mpi.py +318 -0
  136. westpa/work_managers/processes.py +187 -0
  137. westpa/work_managers/serial.py +28 -0
  138. westpa/work_managers/threads.py +79 -0
  139. westpa/work_managers/zeromq/__init__.py +20 -0
  140. westpa/work_managers/zeromq/core.py +641 -0
  141. westpa/work_managers/zeromq/node.py +131 -0
  142. westpa/work_managers/zeromq/work_manager.py +526 -0
  143. westpa/work_managers/zeromq/worker.py +320 -0
  144. westpa-2022.10.dist-info/AUTHORS +22 -0
  145. westpa-2022.10.dist-info/LICENSE +21 -0
  146. westpa-2022.10.dist-info/METADATA +183 -0
  147. westpa-2022.10.dist-info/RECORD +150 -0
  148. westpa-2022.10.dist-info/WHEEL +6 -0
  149. westpa-2022.10.dist-info/entry_points.txt +29 -0
  150. westpa-2022.10.dist-info/top_level.txt +1 -0
@@ -0,0 +1,24 @@
1
+ '''
2
+ Kinetics analysis library
3
+ '''
4
+
5
+ import logging
6
+
7
+ from .rate_averaging import RateAverager # noqa
8
+
9
+ from . import _kinetics # noqa
10
+ from ._kinetics import ( # noqa
11
+ calculate_labeled_fluxes,
12
+ labeled_flux_to_rate,
13
+ calculate_labeled_fluxes_alllags,
14
+ nested_to_flat_matrix,
15
+ nested_to_flat_vector,
16
+ flat_to_nested_matrix,
17
+ flat_to_nested_vector,
18
+ find_macrostate_transitions,
19
+ sequence_macro_flux_to_rate,
20
+ )
21
+ from .events import WKinetics # noqa
22
+
23
+
24
+ log = logging.getLogger(__name__)
@@ -0,0 +1,147 @@
1
+ import warnings
2
+
3
+ import numpy as np
4
+
5
+ from westpa.core.data_manager import weight_dtype
6
+ from westpa.core import h5io
7
+
8
+ # From w_kinetics.
9
+ from westpa.tools.dtypes import ed_list_dtype
10
+ from westpa.core.binning import index_dtype
11
+ from westpa.core.kinetics._kinetics import _fast_transition_state_copy # @UnresolvedImport
12
+ from westpa.core.kinetics import find_macrostate_transitions
13
+
14
+
15
+ warnings.filterwarnings('ignore', category=DeprecationWarning)
16
+ warnings.filterwarnings('ignore', category=RuntimeWarning)
17
+ warnings.filterwarnings('ignore', category=FutureWarning)
18
+
19
+
20
+ # The old w_kinetics
21
+ class WKinetics:
22
+ def w_kinetics(self):
23
+ pi = self.progress.indicator
24
+ pi.new_operation('Initializing')
25
+
26
+ self.data_reader.open('r')
27
+ self.open_files()
28
+ nstates = self.assignments_file.attrs['nstates']
29
+ start_iter, stop_iter = self.iter_range.iter_start, self.iter_range.iter_stop # h5io.get_iter_range(self.assignments_file)
30
+ iter_count = stop_iter - start_iter
31
+ durations_ds = self.output_file.replace_dataset(
32
+ 'durations',
33
+ shape=(iter_count, 0),
34
+ maxshape=(iter_count, None),
35
+ dtype=ed_list_dtype,
36
+ chunks=(1, 15360) if self.do_compression else None,
37
+ shuffle=self.do_compression,
38
+ compression=9 if self.do_compression else None,
39
+ )
40
+ durations_count_ds = self.output_file.replace_dataset(
41
+ 'duration_count', shape=(iter_count,), dtype=np.int_, shuffle=True, compression=9
42
+ )
43
+ cond_fluxes_ds = self.output_file.replace_dataset(
44
+ 'conditional_fluxes',
45
+ shape=(iter_count, nstates, nstates),
46
+ dtype=weight_dtype,
47
+ chunks=(h5io.calc_chunksize((iter_count, nstates, nstates), weight_dtype) if self.do_compression else None),
48
+ shuffle=self.do_compression,
49
+ compression=9 if self.do_compression else None,
50
+ )
51
+ total_fluxes_ds = self.output_file.replace_dataset(
52
+ 'total_fluxes',
53
+ shape=(iter_count, nstates),
54
+ dtype=weight_dtype,
55
+ chunks=(h5io.calc_chunksize((iter_count, nstates), weight_dtype) if self.do_compression else None),
56
+ shuffle=self.do_compression,
57
+ compression=9 if self.do_compression else None,
58
+ )
59
+
60
+ cond_arrival_counts_ds = self.output_file.replace_dataset(
61
+ 'conditional_arrivals',
62
+ shape=(iter_count, nstates, nstates),
63
+ dtype=np.uint,
64
+ chunks=(h5io.calc_chunksize((iter_count, nstates, nstates), np.uint) if self.do_compression else None),
65
+ shuffle=self.do_compression,
66
+ compression=9 if self.do_compression else None,
67
+ )
68
+ arrival_counts_ds = self.output_file.replace_dataset(
69
+ 'arrivals',
70
+ shape=(iter_count, nstates),
71
+ dtype=np.uint,
72
+ chunks=(h5io.calc_chunksize((iter_count, nstates), np.uint) if self.do_compression else None),
73
+ shuffle=self.do_compression,
74
+ compression=9 if self.do_compression else None,
75
+ )
76
+
77
+ # copy state labels for convenience
78
+ self.output_file.replace_dataset('state_labels', data=self.assignments_file['state_labels'][...])
79
+
80
+ # Put nice labels on things
81
+ for ds in (self.output_file, durations_count_ds, cond_fluxes_ds, total_fluxes_ds):
82
+ h5io.stamp_iter_range(ds, start_iter, stop_iter)
83
+
84
+ # Calculate instantaneous rate matrices and trace trajectories
85
+ last_state = None
86
+ pi.new_operation('Tracing trajectories', iter_count)
87
+ for iiter, n_iter in enumerate(range(start_iter, stop_iter)):
88
+ # Get data from the main HDF5 file
89
+ iter_group = self.data_reader.get_iter_group(n_iter)
90
+ seg_index = iter_group['seg_index']
91
+ nsegs, npts = iter_group['pcoord'].shape[0:2]
92
+ weights = seg_index['weight']
93
+ # parent_ids = seg_index['parent_id']
94
+ parent_ids = self.data_reader.parent_id_dsspec.get_iter_data(n_iter)
95
+
96
+ # Get bin and traj. ensemble assignments from the previously-generated assignments file
97
+ assignment_iiter = h5io.get_iteration_entry(self.assignments_file, n_iter)
98
+ bin_assignments = np.require(
99
+ self.assignments_file['assignments'][assignment_iiter + np.s_[:nsegs, :npts]], dtype=index_dtype
100
+ )
101
+ label_assignments = np.require(
102
+ self.assignments_file['trajlabels'][assignment_iiter + np.s_[:nsegs, :npts]], dtype=index_dtype
103
+ )
104
+ state_assignments = np.require(
105
+ self.assignments_file['statelabels'][assignment_iiter + np.s_[:nsegs, :npts]], dtype=index_dtype
106
+ )
107
+
108
+ # Prepare to run analysis
109
+ cond_fluxes = np.zeros((nstates, nstates), weight_dtype)
110
+ total_fluxes = np.zeros((nstates,), weight_dtype)
111
+ cond_counts = np.zeros((nstates, nstates), np.uint)
112
+ total_counts = np.zeros((nstates,), np.uint)
113
+ durations = []
114
+
115
+ # Estimate macrostate fluxes and calculate event durations using trajectory tracing
116
+ # state is opaque to the find_macrostate_transitions function
117
+ dt = 1.0 if npts == 1 else 1.0 / (npts - 1)
118
+ state = _fast_transition_state_copy(iiter, nstates, parent_ids, last_state)
119
+ find_macrostate_transitions(
120
+ nstates,
121
+ weights,
122
+ label_assignments,
123
+ state_assignments,
124
+ dt,
125
+ state,
126
+ cond_fluxes,
127
+ cond_counts,
128
+ total_fluxes,
129
+ total_counts,
130
+ durations,
131
+ )
132
+ last_state = state
133
+
134
+ # Store trace-based kinetics data
135
+ cond_fluxes_ds[iiter] = cond_fluxes
136
+ total_fluxes_ds[iiter] = total_fluxes
137
+ arrival_counts_ds[iiter] = total_counts
138
+ cond_arrival_counts_ds[iiter] = cond_counts
139
+
140
+ durations_count_ds[iiter] = len(durations)
141
+ if len(durations) > 0:
142
+ durations_ds.resize((iter_count, max(len(durations), durations_ds.shape[1])))
143
+ durations_ds[iiter, : len(durations)] = durations
144
+
145
+ # Do a little manual clean-up to prevent memory explosion
146
+ del iter_group, weights, parent_ids, bin_assignments, label_assignments, state, cond_fluxes, total_fluxes
147
+ pi.progress += 1
@@ -0,0 +1,156 @@
1
+ """
2
+ Routines for implementing Letteri et al.'s macrostate-to-macrostate rate calculations
3
+ using extrapolation to steady-state populations from average rate matrices
4
+
5
+ Internally, "labeled" objects (bin populations labeled by history, rate matrix elements labeled
6
+ by history) are stored as nested arrays -- e.g. rates[initial_label, final_label, initial_bin, final_bin].
7
+ These are converted to the flat forms required for, say, eigenvalue calculations internally, and the
8
+ results converted back. This is because these conversions are not expensive, and saves users of
9
+ this code from having to know how the flattened indexing works (something I screwed up all too
10
+ easily during development) -- mcz
11
+ """
12
+
13
+ import logging
14
+ import warnings
15
+
16
+ import numpy as np
17
+ import scipy.linalg
18
+
19
+ from westpa.core.data_manager import weight_dtype
20
+
21
+ from ._kinetics import (
22
+ calculate_labeled_fluxes, # @UnresolvedImport
23
+ calculate_labeled_fluxes_alllags, # @UnresolvedImport
24
+ labeled_flux_to_rate, # @UnresolvedImport
25
+ nested_to_flat_matrix,
26
+ nested_to_flat_vector, # @UnresolvedImport
27
+ flat_to_nested_vector,
28
+ _reduce_labeled_rate_matrix_to_macro,
29
+ ) # @UnresolvedImport
30
+
31
+
32
+ log = logging.getLogger(__name__)
33
+
34
+
35
+ class ConsistencyWarning(UserWarning):
36
+ pass
37
+
38
+
39
+ def get_steady_state(rates):
40
+ '''Get steady state solution for a rate matrix. As an optimization, returns the
41
+ flattened labeled population vector (of length nstates*nbins); to convert to the
42
+ nested vector used for storage, use nested_to_flat_vector().'''
43
+
44
+ rates = rates.copy()
45
+
46
+ # Convert to a transition probability matrix
47
+ for i in range(rates.shape[0]):
48
+ rowsum = rates[i, :].sum()
49
+ if rowsum > 0:
50
+ rates[i, :] = rates[i, :] / rowsum
51
+ else:
52
+ if rates[:, i].sum() != 0:
53
+ warnings.warn('sink microstate in rate matrix', ConsistencyWarning)
54
+ rates[i, :] = 0
55
+
56
+ try:
57
+ vals, vecs = scipy.linalg.eig(rates.T)
58
+ except Exception:
59
+ log.debug('exception obtaining eigenvectors', exc_info=True)
60
+ return None
61
+
62
+ vals = np.abs(vals)
63
+ log.debug('eigenvalues: {!r}'.format(list(reversed(sorted(vals)))))
64
+ asort = np.argsort(vals)
65
+ vec = vecs[:, asort[-1]]
66
+ ss = np.abs(vec)
67
+
68
+ ss /= ss.sum()
69
+ return ss
70
+
71
+
72
+ def get_macrostate_rates(labeled_rates, labeled_pops, extrapolate=True):
73
+ '''Using a labeled rate matrix and labeled bin populations, calculate the steady state
74
+ probability distribution and consequent state-to-state rates.
75
+
76
+ Returns ``(ss, macro_rates)``, where ``ss`` is the steady-state probability distribution
77
+ and ``macro_rates`` is the state-to-state rate matrix.'''
78
+
79
+ nstates, nbins = labeled_pops.shape
80
+
81
+ rates = nested_to_flat_matrix(labeled_rates)
82
+
83
+ # Find steady-state solution
84
+ if extrapolate:
85
+ ss = get_steady_state(rates)
86
+ if ss is None:
87
+ warnings.warn('no well-defined steady state; using average populations', ConsistencyWarning)
88
+ ss = nested_to_flat_vector(labeled_pops)
89
+ else:
90
+ ss = nested_to_flat_vector(labeled_pops)
91
+
92
+ macro_rates = _reduce_labeled_rate_matrix_to_macro(nstates, nbins, rates, ss)
93
+
94
+ return flat_to_nested_vector(nstates, nbins, ss), macro_rates
95
+
96
+
97
+ def estimate_rates(
98
+ nbins,
99
+ state_labels,
100
+ weights,
101
+ parent_ids,
102
+ bin_assignments,
103
+ label_assignments,
104
+ state_map,
105
+ labeled_pops,
106
+ all_lags=False,
107
+ labeled_fluxes=None,
108
+ labeled_rates=None,
109
+ unlabeled_rates=None,
110
+ ):
111
+ '''Estimate fluxes and rates over multiple iterations. The number of iterations is determined by how many
112
+ vectors of weights, parent IDs, bin assignments, and label assignments are passed.
113
+
114
+ If ``all_lags`` is true, then the average is over all possible lags within the length-N window given, otherwise
115
+ simply the length N lag.
116
+
117
+ Returns labeled flux matrix, labeled rate matrix, and unlabeled rate matrix.'''
118
+
119
+ assert len(weights) == len(parent_ids) == len(bin_assignments) == len(label_assignments)
120
+ nstates = len(state_labels)
121
+ nbins = labeled_pops.shape[1] - 1
122
+
123
+ # Prepare output arrays
124
+ if labeled_fluxes is None:
125
+ labeled_fluxes = np.zeros((nstates, nstates, nbins, nbins), weight_dtype)
126
+ else:
127
+ labeled_fluxes.fill(0.0)
128
+
129
+ if labeled_rates is None:
130
+ labeled_rates = np.zeros_like(labeled_fluxes)
131
+ else:
132
+ labeled_rates.fill(0.0)
133
+
134
+ if unlabeled_rates is None:
135
+ unlabeled_rates = np.zeros((nbins, nbins), weight_dtype)
136
+ else:
137
+ unlabeled_rates.fill(0.0)
138
+
139
+ # Loop over all possible windows to accumulate flux matrix
140
+ # flux matrix is [initial_label][final_label][initial_bin][final_bin]
141
+ if all_lags:
142
+ twindow = calculate_labeled_fluxes_alllags(nstates, weights, parent_ids, bin_assignments, label_assignments, labeled_fluxes)
143
+ else:
144
+ twindow = calculate_labeled_fluxes(nstates, weights, parent_ids, bin_assignments, label_assignments, labeled_fluxes)
145
+ labeled_fluxes /= twindow
146
+
147
+ # Calculate rate matrix for this window, using populations from the last iteration (which correspond
148
+ # to the weights that contribute to the flux matrix)
149
+ labeled_flux_to_rate(labeled_fluxes, labeled_pops, labeled_rates)
150
+
151
+ # Calculate an unlabeled rate matrix
152
+ unlabeled_fluxes = np.sum(labeled_fluxes, axis=(0, 1))
153
+ unlabeled_pops = labeled_pops.sum(axis=0)
154
+ unlabeled_rates[...] = labeled_flux_to_rate(unlabeled_fluxes[None, None, :, :], unlabeled_pops[None, :])[0, 0]
155
+
156
+ return labeled_fluxes, labeled_rates, unlabeled_rates
@@ -0,0 +1,266 @@
1
+ from collections import namedtuple
2
+ from itertools import zip_longest
3
+
4
+ import numpy as np
5
+
6
+ import westpa
7
+ from westpa.core.kinetics._kinetics import (
8
+ flux_assign,
9
+ pop_assign,
10
+ calc_rates,
11
+ StreamingStats1D,
12
+ StreamingStats2D,
13
+ ) # @UnresolvedImport
14
+
15
+
16
+ # Named tuple proxy for StreamingStats class
17
+ StreamingStatsTuple = namedtuple('StreamingStatsTuple', ['M1', 'M2', 'n'])
18
+
19
+
20
+ def grouper(n, iterable, fillvalue=None):
21
+ "Collect data into fixed-length chunks or blocks"
22
+ # grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx
23
+ args = [iter(iterable)] * n
24
+ return zip_longest(fillvalue=fillvalue, *args)
25
+
26
+
27
+ def tuple2stats(stat_tuple):
28
+ ndims = stat_tuple.M1.ndim
29
+ assert ndims == 1 or ndims == 2
30
+
31
+ if ndims == 2:
32
+ stats = StreamingStats2D(stat_tuple.M1.shape)
33
+ elif ndims == 1:
34
+ stats = StreamingStats1D(stat_tuple.M1.shape[0])
35
+ else:
36
+ raise ValueError
37
+
38
+ stats.M1 = stat_tuple.M1
39
+ stats.M2 = stat_tuple.M2
40
+ stats.n = stat_tuple.n
41
+
42
+ return stats
43
+
44
+
45
+ def process_iter_chunk(bin_mapper, iter_indices, iter_data=None):
46
+ '''Calculate the flux matrices and populations of a set of iterations specified
47
+ by iter_indices. Optionally provide the necessary arrays to perform the calculation
48
+ in iter_data. Otherwise get data from the data_manager directly.
49
+ '''
50
+
51
+ data_manager = westpa.rc.get_data_manager()
52
+ system = westpa.rc.get_system_driver()
53
+
54
+ # itercount = len(iter_indices)
55
+ nbins = bin_mapper.nbins
56
+
57
+ flux_stats = StreamingStats2D((nbins, nbins))
58
+ rate_stats = StreamingStats2D((nbins, nbins))
59
+ pop_stats = StreamingStats1D(nbins)
60
+
61
+ nomask1d = np.zeros((nbins,), np.uint8)
62
+ nomask2d = np.zeros((nbins, nbins), np.uint8)
63
+ rate_mask = np.zeros((nbins, nbins), np.uint8)
64
+
65
+ flux_matrix = np.zeros((nbins, nbins), np.float64)
66
+ rate_matrix = np.zeros((nbins, nbins), np.float64)
67
+ population_vector = np.zeros((nbins,), np.float64)
68
+
69
+ pcoord_len = system.pcoord_len
70
+ assign = bin_mapper.assign
71
+
72
+ for iiter, n_iter in enumerate(iter_indices):
73
+ flux_matrix.fill(0.0)
74
+ population_vector.fill(0.0)
75
+
76
+ if iter_data:
77
+ iter_group_name = 'iter_{:0{prec}d}'.format(int(n_iter), prec=data_manager.iter_prec)
78
+ iter_group = iter_data[iter_group_name]
79
+ else:
80
+ iter_group = data_manager.get_iter_group(n_iter)
81
+
82
+ # first, account for the flux due to recycling
83
+ # we access the hdf5 file directly to avoid nearly 50% overhead of creating a ton of
84
+ # tiny newweightentry objects
85
+ try:
86
+ nwgroup = iter_group['new_weights']
87
+ except KeyError:
88
+ # no new weight data
89
+ pass
90
+ else:
91
+ if iter_data:
92
+ index = None
93
+ weights = nwgroup['weight']
94
+ prev_init_pcoords = nwgroup['prev_init_pcoord']
95
+ new_init_pcoords = nwgroup['new_init_pcoord']
96
+ else:
97
+ index = nwgroup['index'][...]
98
+ weights = index['weight']
99
+ prev_init_pcoords = nwgroup['prev_init_pcoord'][...]
100
+ new_init_pcoords = nwgroup['new_init_pcoord'][...]
101
+
102
+ prev_init_assignments = assign(prev_init_pcoords)
103
+ new_init_assignments = assign(new_init_pcoords)
104
+
105
+ flux_assign(weights, prev_init_assignments, new_init_assignments, flux_matrix)
106
+ # for (weight,i,j) in izip (weights, prev_init_assignments, new_init_assignments):
107
+ # flux_matrices[iiter,i,j] += weight
108
+ del index
109
+ del prev_init_pcoords, new_init_pcoords, prev_init_assignments, new_init_assignments, weights
110
+
111
+ # iter_group = data_manager.get_iter_group(n_iter)
112
+ if iter_data:
113
+ weights = iter_group['weight']
114
+ initial_pcoords = iter_group['initial_pcoords']
115
+ final_pcoords = iter_group['final_pcoords']
116
+ else:
117
+ weights = iter_group['seg_index']['weight']
118
+ initial_pcoords = iter_group['pcoord'][:, 0]
119
+ final_pcoords = iter_group['pcoord'][:, pcoord_len - 1]
120
+
121
+ initial_assignments = assign(initial_pcoords)
122
+ final_assignments = assign(final_pcoords)
123
+
124
+ flux_assign(weights, initial_assignments, final_assignments, flux_matrix)
125
+ pop_assign(weights, initial_assignments, population_vector)
126
+
127
+ flux_stats.update(flux_matrix, nomask2d)
128
+ pop_stats.update(population_vector, nomask1d)
129
+
130
+ calc_rates(flux_matrix, population_vector, rate_matrix, rate_mask)
131
+ rate_stats.update(rate_matrix, rate_mask)
132
+
133
+ del weights
134
+ del initial_assignments, final_assignments
135
+ del initial_pcoords, final_pcoords
136
+ del iter_group
137
+
138
+ # Create namedtuple proxies for the cython StreamingStats objects
139
+ # since the typed memoryviews class variables do not seem to return
140
+ # cleanly from the zmq workers
141
+ c_flux_stats = StreamingStatsTuple(flux_stats.M1, flux_stats.M2, flux_stats.n)
142
+ c_rate_stats = StreamingStatsTuple(rate_stats.M1, rate_stats.M2, rate_stats.n)
143
+ c_pop_stats = StreamingStatsTuple(pop_stats.M1, pop_stats.M2, pop_stats.n)
144
+
145
+ return c_flux_stats, c_rate_stats, c_pop_stats
146
+
147
+
148
+ class RateAverager:
149
+ '''Calculate bin-to-bin kinetic properties (fluxes, rates, populations) at
150
+ 1-tau resolution'''
151
+
152
+ def __init__(self, bin_mapper, system=None, data_manager=None, work_manager=None):
153
+ self.bin_mapper = bin_mapper
154
+ self.data_manager = data_manager or westpa.rc.get_data_manager()
155
+ self.system = system or westpa.rc.get_system_driver()
156
+ self.work_manager = work_manager or westpa.rc.get_work_manager()
157
+
158
+ def extract_data(self, iter_indices):
159
+ '''Extract data from the data_manger and place in dict mirroring the same
160
+ underlying layout.'''
161
+
162
+ data = {}
163
+ pcoord_len = self.system.pcoord_len
164
+
165
+ for n_iter in iter_indices:
166
+ iter_group_name = 'iter_{:0{prec}d}'.format(int(n_iter), prec=self.data_manager.iter_prec)
167
+ iter_group = self.data_manager.get_iter_group(n_iter)
168
+ di = data[iter_group_name] = {}
169
+
170
+ try:
171
+ nwgroup = iter_group['new_weights']
172
+ except KeyError:
173
+ # no new weight data
174
+ pass
175
+ else:
176
+ di_nw = di['new_weights'] = {}
177
+ di_nw['weight'] = nwgroup['index'][...]['weight']
178
+ di_nw['prev_init_pcoord'] = nwgroup['prev_init_pcoord'][...]
179
+ di_nw['new_init_pcoord'] = nwgroup['new_init_pcoord'][...]
180
+
181
+ di['weight'] = iter_group['seg_index']['weight']
182
+ di['initial_pcoords'] = iter_group['pcoord'][:, 0]
183
+ di['final_pcoords'] = iter_group['pcoord'][:, pcoord_len - 1]
184
+
185
+ return data
186
+
187
+ def task_generator(self, iter_start, iter_stop, block_size):
188
+ for iter_block in grouper(block_size, range(iter_start, iter_stop)):
189
+ iter_block = [x for x in iter_block if x is not None]
190
+ iter_data = self.extract_data(iter_block)
191
+ yield (process_iter_chunk, (self.bin_mapper, iter_block), {'iter_data': iter_data})
192
+ del iter_data
193
+
194
+ def calculate(self, iter_start=None, iter_stop=None, n_blocks=1, queue_size=1):
195
+ '''Read the HDF5 file and collect flux matrices and population vectors
196
+ for each bin for each iteration in the range [iter_start, iter_stop). Break
197
+ the calculation into n_blocks blocks. If the calculation is broken up into
198
+ more than one block, queue_size specifies the maxmimum number of tasks in
199
+ the work queue.'''
200
+
201
+ iter_start = iter_start or 1
202
+ iter_stop = iter_stop or self.data_manager.current_iteration
203
+
204
+ itercount = iter_stop - iter_start
205
+ block_size = max(1, itercount // n_blocks)
206
+ nbins = self.bin_mapper.nbins
207
+
208
+ if n_blocks == 1:
209
+ flux_stats_t, rate_stats_t, population_stats_t = process_iter_chunk(self.bin_mapper, list(range(iter_start, iter_stop)))
210
+
211
+ flux_stats = tuple2stats(flux_stats_t)
212
+ rate_stats = tuple2stats(rate_stats_t)
213
+ population_stats = tuple2stats(population_stats_t)
214
+ else:
215
+ flux_stats = StreamingStats2D((nbins, nbins))
216
+ rate_stats = StreamingStats2D((nbins, nbins))
217
+ population_stats = StreamingStats1D(nbins)
218
+
219
+ task_generator = self.task_generator(iter_start, iter_stop, block_size)
220
+
221
+ for future in self.work_manager.submit_as_completed(task_generator, queue_size):
222
+ chunk_flux_stats_t, chunk_rate_stats_t, chunk_pop_stats_t = future.get_result()
223
+
224
+ chunk_flux_stats = tuple2stats(chunk_flux_stats_t)
225
+ chunk_rate_stats = tuple2stats(chunk_rate_stats_t)
226
+ chunk_pop_stats = tuple2stats(chunk_pop_stats_t)
227
+
228
+ # Update statistics with chunked subsets
229
+ flux_stats += chunk_flux_stats
230
+ rate_stats += chunk_rate_stats
231
+ population_stats += chunk_pop_stats
232
+
233
+ self.average_flux = flux_stats.mean
234
+ self.stderr_flux = np.nan_to_num(np.sqrt(flux_stats.var) / flux_stats.n)
235
+
236
+ self.average_populations = population_stats.mean
237
+ self.stderr_populations = np.nan_to_num(np.sqrt(population_stats.var) / population_stats.n)
238
+
239
+ self.average_rate = rate_stats.mean
240
+ self.stderr_rate = np.nan_to_num(np.sqrt(rate_stats.var) / rate_stats.n)
241
+
242
+ assert ~np.any(np.isinf(self.stderr_flux))
243
+ assert ~np.any(np.isinf(self.stderr_rate))
244
+ assert ~np.any(np.isinf(self.stderr_populations))
245
+
246
+
247
+ if __name__ == '__main__':
248
+ # Tests this file on the west.h5 data in the current (sim root) directory
249
+ westpa.rc.read_config()
250
+ system = westpa.rc.get_system_driver()
251
+ data_manager = westpa.rc.get_data_manager()
252
+ data_manager.open_backing('r')
253
+ averager = RateAverager(system.bin_mapper)
254
+ averager.calculate()
255
+
256
+ print('Population mean and standard error')
257
+ print(averager.average_populations)
258
+ print(averager.stderr_populations)
259
+
260
+ print('Flux matrix, mean and standard error')
261
+ print(averager.average_flux)
262
+ print(averager.stderr_flux)
263
+
264
+ print('Rate matrix, mean and standard error')
265
+ print(averager.average_rate)
266
+ print(averager.stderr_rate)