westpa 2022.10__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of westpa might be problematic. Click here for more details.
- westpa/__init__.py +14 -0
- westpa/_version.py +21 -0
- westpa/analysis/__init__.py +5 -0
- westpa/analysis/core.py +746 -0
- westpa/analysis/statistics.py +27 -0
- westpa/analysis/trajectories.py +360 -0
- westpa/cli/__init__.py +0 -0
- westpa/cli/core/__init__.py +0 -0
- westpa/cli/core/w_fork.py +152 -0
- westpa/cli/core/w_init.py +230 -0
- westpa/cli/core/w_run.py +77 -0
- westpa/cli/core/w_states.py +212 -0
- westpa/cli/core/w_succ.py +99 -0
- westpa/cli/core/w_truncate.py +59 -0
- westpa/cli/tools/__init__.py +0 -0
- westpa/cli/tools/ploterr.py +506 -0
- westpa/cli/tools/plothist.py +706 -0
- westpa/cli/tools/w_assign.py +596 -0
- westpa/cli/tools/w_bins.py +166 -0
- westpa/cli/tools/w_crawl.py +119 -0
- westpa/cli/tools/w_direct.py +547 -0
- westpa/cli/tools/w_dumpsegs.py +94 -0
- westpa/cli/tools/w_eddist.py +506 -0
- westpa/cli/tools/w_fluxanl.py +378 -0
- westpa/cli/tools/w_ipa.py +833 -0
- westpa/cli/tools/w_kinavg.py +127 -0
- westpa/cli/tools/w_kinetics.py +96 -0
- westpa/cli/tools/w_multi_west.py +414 -0
- westpa/cli/tools/w_ntop.py +213 -0
- westpa/cli/tools/w_pdist.py +515 -0
- westpa/cli/tools/w_postanalysis_matrix.py +82 -0
- westpa/cli/tools/w_postanalysis_reweight.py +53 -0
- westpa/cli/tools/w_red.py +486 -0
- westpa/cli/tools/w_reweight.py +780 -0
- westpa/cli/tools/w_select.py +226 -0
- westpa/cli/tools/w_stateprobs.py +111 -0
- westpa/cli/tools/w_trace.py +599 -0
- westpa/core/__init__.py +0 -0
- westpa/core/_rc.py +673 -0
- westpa/core/binning/__init__.py +55 -0
- westpa/core/binning/_assign.cpython-312-darwin.so +0 -0
- westpa/core/binning/assign.py +449 -0
- westpa/core/binning/binless.py +96 -0
- westpa/core/binning/binless_driver.py +54 -0
- westpa/core/binning/binless_manager.py +190 -0
- westpa/core/binning/bins.py +47 -0
- westpa/core/binning/mab.py +427 -0
- westpa/core/binning/mab_driver.py +54 -0
- westpa/core/binning/mab_manager.py +198 -0
- westpa/core/data_manager.py +1694 -0
- westpa/core/extloader.py +74 -0
- westpa/core/h5io.py +995 -0
- westpa/core/kinetics/__init__.py +24 -0
- westpa/core/kinetics/_kinetics.cpython-312-darwin.so +0 -0
- westpa/core/kinetics/events.py +147 -0
- westpa/core/kinetics/matrates.py +156 -0
- westpa/core/kinetics/rate_averaging.py +266 -0
- westpa/core/progress.py +218 -0
- westpa/core/propagators/__init__.py +54 -0
- westpa/core/propagators/executable.py +715 -0
- westpa/core/reweight/__init__.py +14 -0
- westpa/core/reweight/_reweight.cpython-312-darwin.so +0 -0
- westpa/core/reweight/matrix.py +126 -0
- westpa/core/segment.py +119 -0
- westpa/core/sim_manager.py +830 -0
- westpa/core/states.py +359 -0
- westpa/core/systems.py +93 -0
- westpa/core/textio.py +74 -0
- westpa/core/trajectory.py +330 -0
- westpa/core/we_driver.py +908 -0
- westpa/core/wm_ops.py +43 -0
- westpa/core/yamlcfg.py +391 -0
- westpa/fasthist/__init__.py +34 -0
- westpa/fasthist/__main__.py +110 -0
- westpa/fasthist/_fasthist.cpython-312-darwin.so +0 -0
- westpa/mclib/__init__.py +264 -0
- westpa/mclib/__main__.py +28 -0
- westpa/mclib/_mclib.cpython-312-darwin.so +0 -0
- westpa/oldtools/__init__.py +4 -0
- westpa/oldtools/aframe/__init__.py +35 -0
- westpa/oldtools/aframe/atool.py +75 -0
- westpa/oldtools/aframe/base_mixin.py +26 -0
- westpa/oldtools/aframe/binning.py +178 -0
- westpa/oldtools/aframe/data_reader.py +560 -0
- westpa/oldtools/aframe/iter_range.py +200 -0
- westpa/oldtools/aframe/kinetics.py +117 -0
- westpa/oldtools/aframe/mcbs.py +146 -0
- westpa/oldtools/aframe/output.py +39 -0
- westpa/oldtools/aframe/plotting.py +90 -0
- westpa/oldtools/aframe/trajwalker.py +126 -0
- westpa/oldtools/aframe/transitions.py +469 -0
- westpa/oldtools/cmds/__init__.py +0 -0
- westpa/oldtools/cmds/w_ttimes.py +358 -0
- westpa/oldtools/files.py +34 -0
- westpa/oldtools/miscfn.py +23 -0
- westpa/oldtools/stats/__init__.py +4 -0
- westpa/oldtools/stats/accumulator.py +35 -0
- westpa/oldtools/stats/edfs.py +129 -0
- westpa/oldtools/stats/mcbs.py +89 -0
- westpa/tools/__init__.py +33 -0
- westpa/tools/binning.py +472 -0
- westpa/tools/core.py +340 -0
- westpa/tools/data_reader.py +159 -0
- westpa/tools/dtypes.py +31 -0
- westpa/tools/iter_range.py +198 -0
- westpa/tools/kinetics_tool.py +340 -0
- westpa/tools/plot.py +283 -0
- westpa/tools/progress.py +17 -0
- westpa/tools/selected_segs.py +154 -0
- westpa/tools/wipi.py +751 -0
- westpa/trajtree/__init__.py +4 -0
- westpa/trajtree/_trajtree.cpython-312-darwin.so +0 -0
- westpa/trajtree/trajtree.py +117 -0
- westpa/westext/__init__.py +0 -0
- westpa/westext/adaptvoronoi/__init__.py +3 -0
- westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
- westpa/westext/hamsm_restarting/__init__.py +3 -0
- westpa/westext/hamsm_restarting/example_overrides.py +35 -0
- westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
- westpa/westext/stringmethod/__init__.py +11 -0
- westpa/westext/stringmethod/fourier_fitting.py +69 -0
- westpa/westext/stringmethod/string_driver.py +253 -0
- westpa/westext/stringmethod/string_method.py +306 -0
- westpa/westext/weed/BinCluster.py +180 -0
- westpa/westext/weed/ProbAdjustEquil.py +100 -0
- westpa/westext/weed/UncertMath.py +247 -0
- westpa/westext/weed/__init__.py +10 -0
- westpa/westext/weed/weed_driver.py +182 -0
- westpa/westext/wess/ProbAdjust.py +101 -0
- westpa/westext/wess/__init__.py +6 -0
- westpa/westext/wess/wess_driver.py +207 -0
- westpa/work_managers/__init__.py +57 -0
- westpa/work_managers/core.py +396 -0
- westpa/work_managers/environment.py +134 -0
- westpa/work_managers/mpi.py +318 -0
- westpa/work_managers/processes.py +187 -0
- westpa/work_managers/serial.py +28 -0
- westpa/work_managers/threads.py +79 -0
- westpa/work_managers/zeromq/__init__.py +20 -0
- westpa/work_managers/zeromq/core.py +641 -0
- westpa/work_managers/zeromq/node.py +131 -0
- westpa/work_managers/zeromq/work_manager.py +526 -0
- westpa/work_managers/zeromq/worker.py +320 -0
- westpa-2022.10.dist-info/AUTHORS +22 -0
- westpa-2022.10.dist-info/LICENSE +21 -0
- westpa-2022.10.dist-info/METADATA +183 -0
- westpa-2022.10.dist-info/RECORD +150 -0
- westpa-2022.10.dist-info/WHEEL +5 -0
- westpa-2022.10.dist-info/entry_points.txt +29 -0
- westpa-2022.10.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import h5py
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from westpa.tools import WESTParallelTool, ProgressIndicatorComponent
|
|
7
|
+
from westpa.fasthist import histnd, normhistnd
|
|
8
|
+
from westpa.core import h5io
|
|
9
|
+
|
|
10
|
+
log = logging.getLogger('w_eddist')
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DurationDataset:
|
|
14
|
+
'''A facade for the 'dsspec' dataclass that incorporates the mask into get_iter_data method'''
|
|
15
|
+
|
|
16
|
+
def __init__(self, dataset, mask, iter_start=1):
|
|
17
|
+
self.dataset = dataset
|
|
18
|
+
self.mask = mask
|
|
19
|
+
self.dtype = dataset.dtype
|
|
20
|
+
self.iter_start = iter_start
|
|
21
|
+
|
|
22
|
+
def get_iter_data(self, n_iter):
|
|
23
|
+
try:
|
|
24
|
+
assert n_iter >= self.iter_start
|
|
25
|
+
dset = self.dataset[n_iter - 1][self.mask[n_iter - self.iter_start]]
|
|
26
|
+
except (AssertionError, IndexError):
|
|
27
|
+
raise ValueError("Iteration {} is not within the iteration range".format(n_iter))
|
|
28
|
+
nsegs = dset.shape[0]
|
|
29
|
+
if nsegs == 0:
|
|
30
|
+
return None
|
|
31
|
+
else:
|
|
32
|
+
return dset.reshape(nsegs, 1, 1)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def isiterable(x):
|
|
36
|
+
try:
|
|
37
|
+
iter(x)
|
|
38
|
+
except TypeError:
|
|
39
|
+
return False
|
|
40
|
+
else:
|
|
41
|
+
return True
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _remote_min_max(ndim, dset_dtype, n_iter, dsspec):
|
|
45
|
+
try:
|
|
46
|
+
minval = np.finfo(dset_dtype).min
|
|
47
|
+
maxval = np.finfo(dset_dtype).max
|
|
48
|
+
except ValueError:
|
|
49
|
+
minval = np.iinfo(dset_dtype).min
|
|
50
|
+
maxval = np.iinfo(dset_dtype).max
|
|
51
|
+
|
|
52
|
+
data_range = [(maxval, minval) for _i in range(ndim)]
|
|
53
|
+
|
|
54
|
+
dset = dsspec.get_iter_data(n_iter)
|
|
55
|
+
|
|
56
|
+
if dset is None:
|
|
57
|
+
return data_range
|
|
58
|
+
|
|
59
|
+
for idim in range(ndim):
|
|
60
|
+
dimdata = dset[:, :, idim]
|
|
61
|
+
current_min, current_max = data_range[idim]
|
|
62
|
+
current_min = min(current_min, dimdata.min())
|
|
63
|
+
current_max = max(current_max, dimdata.max())
|
|
64
|
+
data_range[idim] = (current_min, current_max)
|
|
65
|
+
del dimdata
|
|
66
|
+
del dset
|
|
67
|
+
return data_range
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _remote_bin_iter(iiter, n_iter, dsspec, wt_dsspec, initpoint, binbounds, ignore_out_of_range):
|
|
71
|
+
iter_hist_shape = tuple(len(bounds) - 1 for bounds in binbounds)
|
|
72
|
+
iter_hist = np.zeros(iter_hist_shape, dtype=np.float64)
|
|
73
|
+
|
|
74
|
+
dset = dsspec.get_iter_data(n_iter)
|
|
75
|
+
if dset is None:
|
|
76
|
+
return iiter, n_iter, iter_hist
|
|
77
|
+
else:
|
|
78
|
+
npts = dset.shape[1]
|
|
79
|
+
weights = wt_dsspec.get_iter_data(n_iter)[:, 0, 0]
|
|
80
|
+
|
|
81
|
+
# dset = dset[:,initpoint:,:]
|
|
82
|
+
for ipt in range(npts - initpoint):
|
|
83
|
+
histnd(dset[:, ipt, :], binbounds, weights, out=iter_hist, binbound_check=False, ignore_out_of_range=ignore_out_of_range)
|
|
84
|
+
|
|
85
|
+
del weights, dset
|
|
86
|
+
|
|
87
|
+
# normalize histogram
|
|
88
|
+
normhistnd(iter_hist, binbounds)
|
|
89
|
+
return iiter, n_iter, iter_hist
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class WEDDist(WESTParallelTool):
|
|
93
|
+
prog = 'w_eddist'
|
|
94
|
+
description = '''\
|
|
95
|
+
Calculate time-resolved transition-event duration distribution from kinetics results
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
-----------------------------------------------------------------------------
|
|
99
|
+
Source data
|
|
100
|
+
-----------------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
Source data is collected from the results of 'w_kinetics trace' (see w_kinetics trace --help for
|
|
103
|
+
more information on generating this dataset).
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
-----------------------------------------------------------------------------
|
|
107
|
+
Histogram binning
|
|
108
|
+
-----------------------------------------------------------------------------
|
|
109
|
+
|
|
110
|
+
By default, histograms are constructed with 100 bins in each dimension. This
|
|
111
|
+
can be overridden by specifying -b/--bins, which accepts a number of different
|
|
112
|
+
kinds of arguments:
|
|
113
|
+
|
|
114
|
+
a single integer N
|
|
115
|
+
N uniformly spaced bins will be used in each dimension.
|
|
116
|
+
|
|
117
|
+
a sequence of integers N1,N2,... (comma-separated)
|
|
118
|
+
N1 uniformly spaced bins will be used for the first dimension, N2 for the
|
|
119
|
+
second, and so on.
|
|
120
|
+
|
|
121
|
+
a list of lists [[B11, B12, B13, ...], [B21, B22, B23, ...], ...]
|
|
122
|
+
The bin boundaries B11, B12, B13, ... will be used for the first dimension,
|
|
123
|
+
B21, B22, B23, ... for the second dimension, and so on. These bin
|
|
124
|
+
boundaries need not be uniformly spaced. These expressions will be
|
|
125
|
+
evaluated with Python's ``eval`` construct, with ``np`` available for
|
|
126
|
+
use [e.g. to specify bins using np.arange()].
|
|
127
|
+
|
|
128
|
+
The first two forms (integer, list of integers) will trigger a scan of all
|
|
129
|
+
data in each dimension in order to determine the minimum and maximum values,
|
|
130
|
+
which may be very expensive for large datasets. This can be avoided by
|
|
131
|
+
explicitly providing bin boundaries using the list-of-lists form.
|
|
132
|
+
|
|
133
|
+
Note that these bins are *NOT* at all related to the bins used to drive WE
|
|
134
|
+
sampling.
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
-----------------------------------------------------------------------------
|
|
138
|
+
Output format
|
|
139
|
+
-----------------------------------------------------------------------------
|
|
140
|
+
|
|
141
|
+
The output file produced (specified by -o/--output, defaulting to "pdist.h5")
|
|
142
|
+
may be fed to plothist to generate plots (or appropriately processed text or
|
|
143
|
+
HDF5 files) from this data. In short, the following datasets are created:
|
|
144
|
+
|
|
145
|
+
``histograms``
|
|
146
|
+
Normalized histograms. The first axis corresponds to iteration, and
|
|
147
|
+
remaining axes correspond to dimensions of the input dataset.
|
|
148
|
+
|
|
149
|
+
``/binbounds_0``
|
|
150
|
+
Vector of bin boundaries for the first (index 0) dimension. Additional
|
|
151
|
+
datasets similarly named (/binbounds_1, /binbounds_2, ...) are created
|
|
152
|
+
for additional dimensions.
|
|
153
|
+
|
|
154
|
+
``/midpoints_0``
|
|
155
|
+
Vector of bin midpoints for the first (index 0) dimension. Additional
|
|
156
|
+
datasets similarly named are created for additional dimensions.
|
|
157
|
+
|
|
158
|
+
``n_iter``
|
|
159
|
+
Vector of iteration numbers corresponding to the stored histograms (i.e.
|
|
160
|
+
the first axis of the ``histograms`` dataset).
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
-----------------------------------------------------------------------------
|
|
164
|
+
Subsequent processing
|
|
165
|
+
-----------------------------------------------------------------------------
|
|
166
|
+
|
|
167
|
+
The output generated by this program (-o/--output, default "pdist.h5") may be
|
|
168
|
+
plotted by the ``plothist`` program. See ``plothist --help`` for more
|
|
169
|
+
information.
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
-----------------------------------------------------------------------------
|
|
173
|
+
Parallelization
|
|
174
|
+
-----------------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
This tool supports parallelized binning, including reading of input data.
|
|
177
|
+
Parallel processing is the default. For simple cases (reading pre-computed
|
|
178
|
+
input data, modest numbers of segments), serial processing (--serial) may be
|
|
179
|
+
more efficient.
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
-----------------------------------------------------------------------------
|
|
183
|
+
Command-line options
|
|
184
|
+
-----------------------------------------------------------------------------
|
|
185
|
+
|
|
186
|
+
'''
|
|
187
|
+
|
|
188
|
+
def __init__(self):
|
|
189
|
+
super().__init__()
|
|
190
|
+
|
|
191
|
+
# Parallel processing by default (this is not actually necessary, but it is
|
|
192
|
+
# informative!)
|
|
193
|
+
self.wm_env.default_work_manager = self.wm_env.default_parallel_work_manager
|
|
194
|
+
|
|
195
|
+
# These are used throughout
|
|
196
|
+
self.progress = ProgressIndicatorComponent()
|
|
197
|
+
self.default_kinetics_file = 'kintrace.h5'
|
|
198
|
+
self.kinetics_filename = None
|
|
199
|
+
self.kinetics_file = None # Kinavg file
|
|
200
|
+
self.istate = None
|
|
201
|
+
self.fstate = None
|
|
202
|
+
# Duration and weight dsspecs
|
|
203
|
+
self.duration_dsspec = None
|
|
204
|
+
self.wt_dsspec = None
|
|
205
|
+
self.binspec = None
|
|
206
|
+
self.output_filename = None
|
|
207
|
+
self.output_file = None
|
|
208
|
+
|
|
209
|
+
# These are used during histogram generation only
|
|
210
|
+
self.iter_start = None
|
|
211
|
+
self.iter_stop = None
|
|
212
|
+
self.ndim = None
|
|
213
|
+
# self.ntimepoints = None
|
|
214
|
+
self.dset_dtype = None
|
|
215
|
+
self.binbounds = None # bin boundaries for each dimension
|
|
216
|
+
self.midpoints = None # bin midpoints for each dimension
|
|
217
|
+
self.data_range = None # data range for each dimension, as the pairs (min,max)
|
|
218
|
+
self.ignore_out_of_range = False
|
|
219
|
+
self.compress_output = False
|
|
220
|
+
|
|
221
|
+
def add_args(self, parser):
|
|
222
|
+
parser.add_argument(
|
|
223
|
+
'-b',
|
|
224
|
+
'--bins',
|
|
225
|
+
dest='bins',
|
|
226
|
+
metavar='BINEXPR',
|
|
227
|
+
default='100',
|
|
228
|
+
help='''Use BINEXPR for bins. This may be an integer, which will be used for each
|
|
229
|
+
dimension of the progress coordinate; a list of integers (formatted as [n1,n2,...])
|
|
230
|
+
which will use n1 bins for the first dimension, n2 for the second dimension, and so on;
|
|
231
|
+
or a list of lists of boundaries (formatted as [[a1, a2, ...], [b1, b2, ...], ... ]), which
|
|
232
|
+
will use [a1, a2, ...] as bin boundaries for the first dimension, [b1, b2, ...] as bin boundaries
|
|
233
|
+
for the second dimension, and so on. (Default: 100 bins in each dimension.)''',
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
parser.add_argument(
|
|
237
|
+
'-C',
|
|
238
|
+
'--compress',
|
|
239
|
+
action='store_true',
|
|
240
|
+
help='''Compress histograms. May make storage of higher-dimensional histograms
|
|
241
|
+
more tractable, at the (possible extreme) expense of increased analysis time.
|
|
242
|
+
(Default: no compression.)''',
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
parser.add_argument(
|
|
246
|
+
'--loose',
|
|
247
|
+
dest='ignore_out_of_range',
|
|
248
|
+
action='store_true',
|
|
249
|
+
help='''Ignore values that do not fall within bins. (Risky, as this can make buggy bin
|
|
250
|
+
boundaries appear as reasonable data. Only use if you are
|
|
251
|
+
sure of your bin boundary specification.)''',
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
parser.add_argument('--istate', type=int, required=True, dest='istate', help='''Initial state defining transition event''')
|
|
255
|
+
|
|
256
|
+
parser.add_argument('--fstate', type=int, required=True, dest='fstate', help='''Final state defining transition event''')
|
|
257
|
+
|
|
258
|
+
itergroup = parser.add_argument_group('iteration range options')
|
|
259
|
+
|
|
260
|
+
itergroup.add_argument(
|
|
261
|
+
'--first-iter', default=1, dest='iter_start', type=int, help='''Iteration to begin analysis (default: 1)'''
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
itergroup.add_argument('--last-iter', dest='iter_stop', type=int, help='''Iteration to end analysis''')
|
|
265
|
+
|
|
266
|
+
iogroup = parser.add_argument_group('input/output options')
|
|
267
|
+
|
|
268
|
+
# self.default_kinetics_file will be picked up as a class attribute from the appropriate subclass
|
|
269
|
+
iogroup.add_argument(
|
|
270
|
+
'-k',
|
|
271
|
+
'--kinetics',
|
|
272
|
+
default=self.default_kinetics_file,
|
|
273
|
+
help='''Populations and transition rates (including evolution) are stored in KINETICS
|
|
274
|
+
(default: %(default)s).''',
|
|
275
|
+
)
|
|
276
|
+
iogroup.add_argument(
|
|
277
|
+
'-o', '--output', dest='output', default='eddist.h5', help='''Store results in OUTPUT (default: %(default)s).'''
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
self.progress.add_args(parser)
|
|
281
|
+
|
|
282
|
+
def process_args(self, args):
|
|
283
|
+
self.progress.process_args(args)
|
|
284
|
+
self.kinetics_filename = args.kinetics
|
|
285
|
+
self.istate = args.istate
|
|
286
|
+
self.fstate = args.fstate
|
|
287
|
+
self.kinetics_file = h5io.WESTPAH5File(self.kinetics_filename, 'r')
|
|
288
|
+
|
|
289
|
+
self.iter_start = args.iter_start
|
|
290
|
+
if args.iter_stop is None:
|
|
291
|
+
self.iter_stop = self.kinetics_file.attrs['iter_stop']
|
|
292
|
+
else:
|
|
293
|
+
self.iter_stop = args.iter_stop + 1
|
|
294
|
+
|
|
295
|
+
self.binspec = args.bins
|
|
296
|
+
self.output_filename = args.output
|
|
297
|
+
self.ignore_out_of_range = bool(args.ignore_out_of_range)
|
|
298
|
+
self.compress_output = args.compress or False
|
|
299
|
+
|
|
300
|
+
def go(self):
|
|
301
|
+
pi = self.progress.indicator
|
|
302
|
+
pi.operation = 'Initializing'
|
|
303
|
+
with pi:
|
|
304
|
+
self.duration = self.kinetics_file['durations'][self.iter_start - 1 : self.iter_stop - 1]
|
|
305
|
+
|
|
306
|
+
# Only select transition events from specified istate to fstate
|
|
307
|
+
mask = (self.duration['istate'] == self.istate) & (self.duration['fstate'] == self.fstate)
|
|
308
|
+
|
|
309
|
+
self.duration_dsspec = DurationDataset(self.kinetics_file['durations']['duration'], mask, self.iter_start)
|
|
310
|
+
self.wt_dsspec = DurationDataset(self.kinetics_file['durations']['weight'], mask, self.iter_start)
|
|
311
|
+
|
|
312
|
+
self.output_file = h5py.File(self.output_filename, 'w')
|
|
313
|
+
h5io.stamp_creator_data(self.output_file)
|
|
314
|
+
|
|
315
|
+
# Construct bin boundaries
|
|
316
|
+
self.construct_bins(self.parse_binspec(self.binspec))
|
|
317
|
+
for idim, (binbounds, midpoints) in enumerate(zip(self.binbounds, self.midpoints)):
|
|
318
|
+
self.output_file['binbounds_{}'.format(idim)] = binbounds
|
|
319
|
+
self.output_file['midpoints_{}'.format(idim)] = midpoints
|
|
320
|
+
|
|
321
|
+
# construct histogram
|
|
322
|
+
self.construct_histogram()
|
|
323
|
+
|
|
324
|
+
# Record iteration range
|
|
325
|
+
iter_range = np.arange(self.iter_start, self.iter_stop, 1, dtype=(np.min_scalar_type(self.iter_stop)))
|
|
326
|
+
self.output_file['n_iter'] = iter_range
|
|
327
|
+
self.output_file['histograms'].attrs['iter_start'] = self.iter_start
|
|
328
|
+
self.output_file['histograms'].attrs['iter_stop'] = self.iter_stop
|
|
329
|
+
|
|
330
|
+
self.output_file.close()
|
|
331
|
+
|
|
332
|
+
@staticmethod
|
|
333
|
+
def parse_binspec(binspec):
|
|
334
|
+
namespace = {'numpy': np, 'np': np, 'inf': float('inf')}
|
|
335
|
+
|
|
336
|
+
try:
|
|
337
|
+
binspec_compiled = eval(binspec, namespace)
|
|
338
|
+
except Exception as e:
|
|
339
|
+
raise ValueError('invalid bin specification: {!r}'.format(e))
|
|
340
|
+
else:
|
|
341
|
+
if log.isEnabledFor(logging.DEBUG):
|
|
342
|
+
log.debug('bin specs: {!r}'.format(binspec_compiled))
|
|
343
|
+
return binspec_compiled
|
|
344
|
+
|
|
345
|
+
def construct_bins(self, bins):
|
|
346
|
+
'''
|
|
347
|
+
Construct bins according to ``bins``, which may be:
|
|
348
|
+
|
|
349
|
+
1) A scalar integer (for that number of bins in each dimension)
|
|
350
|
+
2) A sequence of integers (specifying number of bins for each dimension)
|
|
351
|
+
3) A sequence of sequences of bin boundaries (specifying boundaries for each dimension)
|
|
352
|
+
|
|
353
|
+
Sets ``self.binbounds`` to a list of arrays of bin boundaries appropriate for passing to
|
|
354
|
+
fasthist.histnd, along with ``self.midpoints`` to the midpoints of the bins.
|
|
355
|
+
'''
|
|
356
|
+
|
|
357
|
+
if not isiterable(bins):
|
|
358
|
+
self._construct_bins_from_scalar(bins)
|
|
359
|
+
elif not isiterable(bins[0]):
|
|
360
|
+
self._construct_bins_from_int_seq(bins)
|
|
361
|
+
else:
|
|
362
|
+
self._construct_bins_from_bound_seqs(bins)
|
|
363
|
+
|
|
364
|
+
if log.isEnabledFor(logging.DEBUG):
|
|
365
|
+
log.debug('binbounds: {!r}'.format(self.binbounds))
|
|
366
|
+
|
|
367
|
+
def scan_data_shape(self):
|
|
368
|
+
if self.ndim is None:
|
|
369
|
+
dset = self.duration_dsspec
|
|
370
|
+
# self.ntimepoints = dset.shape[1]
|
|
371
|
+
# self.ndim = dset.shape[2]
|
|
372
|
+
self.ndim = 1
|
|
373
|
+
self.dset_dtype = dset.dtype
|
|
374
|
+
|
|
375
|
+
def scan_data_range(self):
|
|
376
|
+
'''Scan input data for range in each dimension. The number of dimensions is determined
|
|
377
|
+
from the shape of the progress coordinate as of self.iter_start.'''
|
|
378
|
+
|
|
379
|
+
self.progress.indicator.new_operation('Scanning for data range', self.iter_stop - self.iter_start)
|
|
380
|
+
self.scan_data_shape()
|
|
381
|
+
|
|
382
|
+
dset_dtype = self.dset_dtype
|
|
383
|
+
ndim = self.ndim
|
|
384
|
+
dsspec = self.duration_dsspec
|
|
385
|
+
|
|
386
|
+
try:
|
|
387
|
+
minval = np.finfo(dset_dtype).min
|
|
388
|
+
maxval = np.finfo(dset_dtype).max
|
|
389
|
+
except ValueError:
|
|
390
|
+
minval = np.iinfo(dset_dtype).min
|
|
391
|
+
maxval = np.iinfo(dset_dtype).max
|
|
392
|
+
|
|
393
|
+
data_range = self.data_range = [(maxval, minval) for _i in range(self.ndim)]
|
|
394
|
+
|
|
395
|
+
# futures = []
|
|
396
|
+
# for n_iter in xrange(self.iter_start, self.iter_stop):
|
|
397
|
+
# _remote_min_max(ndim, dset_dtype, n_iter, dsspec)
|
|
398
|
+
# futures.append(self.work_manager.submit(_remote_min_max, args=(ndim, dset_dtype, n_iter, dsspec)))
|
|
399
|
+
|
|
400
|
+
# for future in self.work_manager.as_completed(futures):
|
|
401
|
+
for future in self.work_manager.submit_as_completed(
|
|
402
|
+
((_remote_min_max, (ndim, dset_dtype, n_iter, dsspec), {}) for n_iter in range(self.iter_start, self.iter_stop)),
|
|
403
|
+
self.max_queue_len,
|
|
404
|
+
):
|
|
405
|
+
bounds = future.get_result(discard=True)
|
|
406
|
+
for idim in range(ndim):
|
|
407
|
+
current_min, current_max = data_range[idim]
|
|
408
|
+
current_min = min(current_min, bounds[idim][0])
|
|
409
|
+
current_max = max(current_max, bounds[idim][1])
|
|
410
|
+
data_range[idim] = (current_min, current_max)
|
|
411
|
+
self.progress.indicator.progress += 1
|
|
412
|
+
|
|
413
|
+
def _construct_bins_from_scalar(self, bins):
|
|
414
|
+
if self.data_range is None:
|
|
415
|
+
self.scan_data_range()
|
|
416
|
+
|
|
417
|
+
# print(self.data_range)
|
|
418
|
+
|
|
419
|
+
self.binbounds = []
|
|
420
|
+
self.midpoints = []
|
|
421
|
+
for idim in range(self.ndim):
|
|
422
|
+
lb, ub = self.data_range[idim]
|
|
423
|
+
# Advance just beyond the upper bound of the range, so that we catch
|
|
424
|
+
# the maximum in the histogram
|
|
425
|
+
ub *= 1.01
|
|
426
|
+
|
|
427
|
+
# lb -= 0.01
|
|
428
|
+
|
|
429
|
+
boundset = np.linspace(lb, ub, bins + 1)
|
|
430
|
+
midpoints = (boundset[:-1] + boundset[1:]) / 2.0
|
|
431
|
+
self.binbounds.append(boundset)
|
|
432
|
+
self.midpoints.append(midpoints)
|
|
433
|
+
|
|
434
|
+
def _construct_bins_from_int_seq(self, bins):
|
|
435
|
+
if self.data_range is None:
|
|
436
|
+
self.scan_data_range()
|
|
437
|
+
|
|
438
|
+
self.binbounds = []
|
|
439
|
+
self.midpoints = []
|
|
440
|
+
for idim in range(self.ndim):
|
|
441
|
+
lb, ub = self.data_range[idim]
|
|
442
|
+
# Advance just beyond the upper bound of the range, so that we catch
|
|
443
|
+
# the maximum in the histogram
|
|
444
|
+
ub *= 1.01
|
|
445
|
+
|
|
446
|
+
boundset = np.linspace(lb, ub, bins[idim] + 1)
|
|
447
|
+
midpoints = (boundset[:-1] + boundset[1:]) / 2.0
|
|
448
|
+
self.binbounds.append(boundset)
|
|
449
|
+
self.midpoints.append(midpoints)
|
|
450
|
+
|
|
451
|
+
def _construct_bins_from_bound_seqs(self, bins):
|
|
452
|
+
self.binbounds = []
|
|
453
|
+
self.midpoints = []
|
|
454
|
+
for boundset in bins:
|
|
455
|
+
boundset = np.asarray(boundset)
|
|
456
|
+
if (np.diff(boundset) <= 0).any():
|
|
457
|
+
raise ValueError('boundary set {!r} is not strictly monotonically increasing'.format(boundset))
|
|
458
|
+
self.binbounds.append(boundset)
|
|
459
|
+
self.midpoints.append((boundset[:-1] + boundset[1:]) / 2.0)
|
|
460
|
+
|
|
461
|
+
def construct_histogram(self):
|
|
462
|
+
'''Construct a histogram using bins previously constructed with ``construct_bins()``.
|
|
463
|
+
The time series of histogram values is stored in ``histograms``.
|
|
464
|
+
Each histogram in the time series is normalized.'''
|
|
465
|
+
|
|
466
|
+
self.scan_data_shape()
|
|
467
|
+
|
|
468
|
+
iter_count = self.iter_stop - self.iter_start
|
|
469
|
+
histograms_ds = self.output_file.create_dataset(
|
|
470
|
+
'histograms',
|
|
471
|
+
dtype=np.float64,
|
|
472
|
+
shape=((iter_count,) + tuple(len(bounds) - 1 for bounds in self.binbounds)),
|
|
473
|
+
compression=9 if self.compress_output else None,
|
|
474
|
+
)
|
|
475
|
+
binbounds = [np.require(boundset, self.dset_dtype, 'C') for boundset in self.binbounds]
|
|
476
|
+
|
|
477
|
+
self.progress.indicator.new_operation('Constructing histograms', self.iter_stop - self.iter_start)
|
|
478
|
+
task_gen = (
|
|
479
|
+
(_remote_bin_iter, (iiter, n_iter, self.duration_dsspec, self.wt_dsspec, 0, binbounds, self.ignore_out_of_range), {})
|
|
480
|
+
for (iiter, n_iter) in enumerate(range(self.iter_start, self.iter_stop))
|
|
481
|
+
)
|
|
482
|
+
# futures = set()
|
|
483
|
+
# for iiter, n_iter in enumerate(xrange(self.iter_start, self.iter_stop)):
|
|
484
|
+
# initpoint = 1 if iiter > 0 else 0
|
|
485
|
+
# futures.add(self.work_manager.submit(_remote_bin_iter,
|
|
486
|
+
# args=(iiter, n_iter, self.dsspec, self.wt_dsspec, initpoint, binbounds)))
|
|
487
|
+
|
|
488
|
+
# for future in self.work_manager.as_completed(futures):
|
|
489
|
+
# future = self.work_manager.wait_any(futures)
|
|
490
|
+
# for future in self.work_manager.submit_as_completed(task_gen, self.queue_size):
|
|
491
|
+
log.debug('max queue length: {!r}'.format(self.max_queue_len))
|
|
492
|
+
for future in self.work_manager.submit_as_completed(task_gen, self.max_queue_len):
|
|
493
|
+
iiter, n_iter, iter_hist = future.get_result(discard=True)
|
|
494
|
+
self.progress.indicator.progress += 1
|
|
495
|
+
|
|
496
|
+
# store histogram
|
|
497
|
+
histograms_ds[iiter] = iter_hist
|
|
498
|
+
del iter_hist, future
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def entry_point():
|
|
502
|
+
WEDDist().main()
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
if __name__ == '__main__':
|
|
506
|
+
entry_point()
|