westpa 2022.10__cp312-cp312-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of westpa might be problematic. Click here for more details.

Files changed (150) hide show
  1. westpa/__init__.py +14 -0
  2. westpa/_version.py +21 -0
  3. westpa/analysis/__init__.py +5 -0
  4. westpa/analysis/core.py +746 -0
  5. westpa/analysis/statistics.py +27 -0
  6. westpa/analysis/trajectories.py +360 -0
  7. westpa/cli/__init__.py +0 -0
  8. westpa/cli/core/__init__.py +0 -0
  9. westpa/cli/core/w_fork.py +152 -0
  10. westpa/cli/core/w_init.py +230 -0
  11. westpa/cli/core/w_run.py +77 -0
  12. westpa/cli/core/w_states.py +212 -0
  13. westpa/cli/core/w_succ.py +99 -0
  14. westpa/cli/core/w_truncate.py +59 -0
  15. westpa/cli/tools/__init__.py +0 -0
  16. westpa/cli/tools/ploterr.py +506 -0
  17. westpa/cli/tools/plothist.py +706 -0
  18. westpa/cli/tools/w_assign.py +596 -0
  19. westpa/cli/tools/w_bins.py +166 -0
  20. westpa/cli/tools/w_crawl.py +119 -0
  21. westpa/cli/tools/w_direct.py +547 -0
  22. westpa/cli/tools/w_dumpsegs.py +94 -0
  23. westpa/cli/tools/w_eddist.py +506 -0
  24. westpa/cli/tools/w_fluxanl.py +378 -0
  25. westpa/cli/tools/w_ipa.py +833 -0
  26. westpa/cli/tools/w_kinavg.py +127 -0
  27. westpa/cli/tools/w_kinetics.py +96 -0
  28. westpa/cli/tools/w_multi_west.py +414 -0
  29. westpa/cli/tools/w_ntop.py +213 -0
  30. westpa/cli/tools/w_pdist.py +515 -0
  31. westpa/cli/tools/w_postanalysis_matrix.py +82 -0
  32. westpa/cli/tools/w_postanalysis_reweight.py +53 -0
  33. westpa/cli/tools/w_red.py +486 -0
  34. westpa/cli/tools/w_reweight.py +780 -0
  35. westpa/cli/tools/w_select.py +226 -0
  36. westpa/cli/tools/w_stateprobs.py +111 -0
  37. westpa/cli/tools/w_trace.py +599 -0
  38. westpa/core/__init__.py +0 -0
  39. westpa/core/_rc.py +673 -0
  40. westpa/core/binning/__init__.py +55 -0
  41. westpa/core/binning/_assign.cpython-312-darwin.so +0 -0
  42. westpa/core/binning/assign.py +449 -0
  43. westpa/core/binning/binless.py +96 -0
  44. westpa/core/binning/binless_driver.py +54 -0
  45. westpa/core/binning/binless_manager.py +190 -0
  46. westpa/core/binning/bins.py +47 -0
  47. westpa/core/binning/mab.py +427 -0
  48. westpa/core/binning/mab_driver.py +54 -0
  49. westpa/core/binning/mab_manager.py +198 -0
  50. westpa/core/data_manager.py +1694 -0
  51. westpa/core/extloader.py +74 -0
  52. westpa/core/h5io.py +995 -0
  53. westpa/core/kinetics/__init__.py +24 -0
  54. westpa/core/kinetics/_kinetics.cpython-312-darwin.so +0 -0
  55. westpa/core/kinetics/events.py +147 -0
  56. westpa/core/kinetics/matrates.py +156 -0
  57. westpa/core/kinetics/rate_averaging.py +266 -0
  58. westpa/core/progress.py +218 -0
  59. westpa/core/propagators/__init__.py +54 -0
  60. westpa/core/propagators/executable.py +715 -0
  61. westpa/core/reweight/__init__.py +14 -0
  62. westpa/core/reweight/_reweight.cpython-312-darwin.so +0 -0
  63. westpa/core/reweight/matrix.py +126 -0
  64. westpa/core/segment.py +119 -0
  65. westpa/core/sim_manager.py +830 -0
  66. westpa/core/states.py +359 -0
  67. westpa/core/systems.py +93 -0
  68. westpa/core/textio.py +74 -0
  69. westpa/core/trajectory.py +330 -0
  70. westpa/core/we_driver.py +908 -0
  71. westpa/core/wm_ops.py +43 -0
  72. westpa/core/yamlcfg.py +391 -0
  73. westpa/fasthist/__init__.py +34 -0
  74. westpa/fasthist/__main__.py +110 -0
  75. westpa/fasthist/_fasthist.cpython-312-darwin.so +0 -0
  76. westpa/mclib/__init__.py +264 -0
  77. westpa/mclib/__main__.py +28 -0
  78. westpa/mclib/_mclib.cpython-312-darwin.so +0 -0
  79. westpa/oldtools/__init__.py +4 -0
  80. westpa/oldtools/aframe/__init__.py +35 -0
  81. westpa/oldtools/aframe/atool.py +75 -0
  82. westpa/oldtools/aframe/base_mixin.py +26 -0
  83. westpa/oldtools/aframe/binning.py +178 -0
  84. westpa/oldtools/aframe/data_reader.py +560 -0
  85. westpa/oldtools/aframe/iter_range.py +200 -0
  86. westpa/oldtools/aframe/kinetics.py +117 -0
  87. westpa/oldtools/aframe/mcbs.py +146 -0
  88. westpa/oldtools/aframe/output.py +39 -0
  89. westpa/oldtools/aframe/plotting.py +90 -0
  90. westpa/oldtools/aframe/trajwalker.py +126 -0
  91. westpa/oldtools/aframe/transitions.py +469 -0
  92. westpa/oldtools/cmds/__init__.py +0 -0
  93. westpa/oldtools/cmds/w_ttimes.py +358 -0
  94. westpa/oldtools/files.py +34 -0
  95. westpa/oldtools/miscfn.py +23 -0
  96. westpa/oldtools/stats/__init__.py +4 -0
  97. westpa/oldtools/stats/accumulator.py +35 -0
  98. westpa/oldtools/stats/edfs.py +129 -0
  99. westpa/oldtools/stats/mcbs.py +89 -0
  100. westpa/tools/__init__.py +33 -0
  101. westpa/tools/binning.py +472 -0
  102. westpa/tools/core.py +340 -0
  103. westpa/tools/data_reader.py +159 -0
  104. westpa/tools/dtypes.py +31 -0
  105. westpa/tools/iter_range.py +198 -0
  106. westpa/tools/kinetics_tool.py +340 -0
  107. westpa/tools/plot.py +283 -0
  108. westpa/tools/progress.py +17 -0
  109. westpa/tools/selected_segs.py +154 -0
  110. westpa/tools/wipi.py +751 -0
  111. westpa/trajtree/__init__.py +4 -0
  112. westpa/trajtree/_trajtree.cpython-312-darwin.so +0 -0
  113. westpa/trajtree/trajtree.py +117 -0
  114. westpa/westext/__init__.py +0 -0
  115. westpa/westext/adaptvoronoi/__init__.py +3 -0
  116. westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
  117. westpa/westext/hamsm_restarting/__init__.py +3 -0
  118. westpa/westext/hamsm_restarting/example_overrides.py +35 -0
  119. westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
  120. westpa/westext/stringmethod/__init__.py +11 -0
  121. westpa/westext/stringmethod/fourier_fitting.py +69 -0
  122. westpa/westext/stringmethod/string_driver.py +253 -0
  123. westpa/westext/stringmethod/string_method.py +306 -0
  124. westpa/westext/weed/BinCluster.py +180 -0
  125. westpa/westext/weed/ProbAdjustEquil.py +100 -0
  126. westpa/westext/weed/UncertMath.py +247 -0
  127. westpa/westext/weed/__init__.py +10 -0
  128. westpa/westext/weed/weed_driver.py +182 -0
  129. westpa/westext/wess/ProbAdjust.py +101 -0
  130. westpa/westext/wess/__init__.py +6 -0
  131. westpa/westext/wess/wess_driver.py +207 -0
  132. westpa/work_managers/__init__.py +57 -0
  133. westpa/work_managers/core.py +396 -0
  134. westpa/work_managers/environment.py +134 -0
  135. westpa/work_managers/mpi.py +318 -0
  136. westpa/work_managers/processes.py +187 -0
  137. westpa/work_managers/serial.py +28 -0
  138. westpa/work_managers/threads.py +79 -0
  139. westpa/work_managers/zeromq/__init__.py +20 -0
  140. westpa/work_managers/zeromq/core.py +641 -0
  141. westpa/work_managers/zeromq/node.py +131 -0
  142. westpa/work_managers/zeromq/work_manager.py +526 -0
  143. westpa/work_managers/zeromq/worker.py +320 -0
  144. westpa-2022.10.dist-info/AUTHORS +22 -0
  145. westpa-2022.10.dist-info/LICENSE +21 -0
  146. westpa-2022.10.dist-info/METADATA +183 -0
  147. westpa-2022.10.dist-info/RECORD +150 -0
  148. westpa-2022.10.dist-info/WHEEL +5 -0
  149. westpa-2022.10.dist-info/entry_points.txt +29 -0
  150. westpa-2022.10.dist-info/top_level.txt +1 -0
@@ -0,0 +1,515 @@
1
+ import logging
2
+
3
+ import h5py
4
+ import numpy as np
5
+
6
+ from westpa.tools import (
7
+ WESTParallelTool,
8
+ WESTDataReader,
9
+ WESTDSSynthesizer,
10
+ WESTWDSSynthesizer,
11
+ IterRangeSelection,
12
+ ProgressIndicatorComponent,
13
+ )
14
+
15
+ from westpa.fasthist import histnd, normhistnd
16
+ from westpa.core import h5io
17
+
18
+
19
+ log = logging.getLogger('w_pdist')
20
+
21
+
22
+ def isiterable(x):
23
+ try:
24
+ iter(x)
25
+ except TypeError:
26
+ return False
27
+ else:
28
+ return True
29
+
30
+
31
+ def _remote_min_max(ndim, dset_dtype, n_iter, dsspec):
32
+ try:
33
+ minval = np.finfo(dset_dtype).min
34
+ maxval = np.finfo(dset_dtype).max
35
+ except ValueError:
36
+ minval = np.iinfo(dset_dtype).min
37
+ maxval = np.iinfo(dset_dtype).max
38
+
39
+ data_range = [(maxval, minval) for _i in range(ndim)]
40
+
41
+ dset = dsspec.get_iter_data(n_iter)
42
+ for idim in range(ndim):
43
+ dimdata = dset[:, :, idim]
44
+ current_min, current_max = data_range[idim]
45
+ current_min = min(current_min, dimdata.min())
46
+ current_max = max(current_max, dimdata.max())
47
+ data_range[idim] = (current_min, current_max)
48
+ del dimdata
49
+ del dset
50
+ return data_range
51
+
52
+
53
+ def _remote_bin_iter(iiter, n_iter, dsspec, wt_dsspec, initpoint, binbounds, ignore_out_of_range):
54
+ iter_hist_shape = tuple(len(bounds) - 1 for bounds in binbounds)
55
+ iter_hist = np.zeros(iter_hist_shape, dtype=np.float64)
56
+
57
+ dset = dsspec.get_iter_data(n_iter)
58
+ npts = dset.shape[1]
59
+ weights = wt_dsspec.get_iter_data(n_iter)
60
+
61
+ dset = dset[:, initpoint:, :]
62
+ for ipt in range(npts - initpoint):
63
+ histnd(dset[:, ipt, :], binbounds, weights, out=iter_hist, binbound_check=False, ignore_out_of_range=ignore_out_of_range)
64
+
65
+ del weights, dset
66
+
67
+ # normalize histogram
68
+ normhistnd(iter_hist, binbounds)
69
+ return iiter, n_iter, iter_hist
70
+
71
+
72
+ class WPDist(WESTParallelTool):
73
+ prog = 'w_pdist'
74
+ description = '''\
75
+ Calculate time-resolved, multi-dimensional probability distributions of WE
76
+ datasets.
77
+
78
+
79
+ -----------------------------------------------------------------------------
80
+ Source data
81
+ -----------------------------------------------------------------------------
82
+
83
+ Source data is provided either by a user-specified function
84
+ (--construct-dataset) or a list of "data set specifications" (--dsspecs).
85
+ If neither is provided, the progress coordinate dataset ''pcoord'' is used.
86
+
87
+ To use a custom function to extract or calculate data whose probability
88
+ distribution will be calculated, specify the function in standard Python
89
+ MODULE.FUNCTION syntax as the argument to --construct-dataset. This function
90
+ will be called as function(n_iter,iter_group), where n_iter is the iteration
91
+ whose data are being considered and iter_group is the corresponding group
92
+ in the main WEST HDF5 file (west.h5). The function must return data which can
93
+ be indexed as [segment][timepoint][dimension].
94
+
95
+ To use a list of data set specifications, specify --dsspecs and then list the
96
+ desired datasets one-by-one (space-separated in most shells). These data set
97
+ specifications are formatted as NAME[,file=FILENAME,slice=SLICE], which will
98
+ use the dataset called NAME in the HDF5 file FILENAME (defaulting to the main
99
+ WEST HDF5 file west.h5), and slice it with the Python slice expression SLICE
100
+ (as in [0:2] to select the first two elements of the first axis of the
101
+ dataset). The ``slice`` option is most useful for selecting one column (or
102
+ more) from a multi-column dataset, such as arises when using a progress
103
+ coordinate of multiple dimensions.
104
+
105
+
106
+ -----------------------------------------------------------------------------
107
+ Histogram binning
108
+ -----------------------------------------------------------------------------
109
+
110
+ By default, histograms are constructed with 100 bins in each dimension. This
111
+ can be overridden by specifying -b/--bins, which accepts a number of different
112
+ kinds of arguments:
113
+
114
+ a single integer N
115
+ N uniformly spaced bins will be used in each dimension.
116
+
117
+ a sequence of integers N1,N2,... (comma-separated)
118
+ N1 uniformly spaced bins will be used for the first dimension, N2 for the
119
+ second, and so on.
120
+
121
+ a list of lists [[B11, B12, B13, ...], [B21, B22, B23, ...], ...]
122
+ The bin boundaries B11, B12, B13, ... will be used for the first dimension,
123
+ B21, B22, B23, ... for the second dimension, and so on. These bin
124
+ boundaries need not be uniformly spaced. These expressions will be
125
+ evaluated with Python's ``eval`` construct, with ``np`` available for
126
+ use [e.g. to specify bins using np.arange()].
127
+
128
+ The first two forms (integer, list of integers) will trigger a scan of all
129
+ data in each dimension in order to determine the minimum and maximum values,
130
+ which may be very expensive for large datasets. This can be avoided by
131
+ explicitly providing bin boundaries using the list-of-lists form.
132
+
133
+ Note that these bins are *NOT* at all related to the bins used to drive WE
134
+ sampling.
135
+
136
+
137
+ -----------------------------------------------------------------------------
138
+ Output format
139
+ -----------------------------------------------------------------------------
140
+
141
+ The output file produced (specified by -o/--output, defaulting to "pdist.h5")
142
+ may be fed to plothist to generate plots (or appropriately processed text or
143
+ HDF5 files) from this data. In short, the following datasets are created:
144
+
145
+ ``histograms``
146
+ Normalized histograms. The first axis corresponds to iteration, and
147
+ remaining axes correspond to dimensions of the input dataset.
148
+
149
+ ``/binbounds_0``
150
+ Vector of bin boundaries for the first (index 0) dimension. Additional
151
+ datasets similarly named (/binbounds_1, /binbounds_2, ...) are created
152
+ for additional dimensions.
153
+
154
+ ``/midpoints_0``
155
+ Vector of bin midpoints for the first (index 0) dimension. Additional
156
+ datasets similarly named are created for additional dimensions.
157
+
158
+ ``n_iter``
159
+ Vector of iteration numbers corresponding to the stored histograms (i.e.
160
+ the first axis of the ``histograms`` dataset).
161
+
162
+
163
+ -----------------------------------------------------------------------------
164
+ Subsequent processing
165
+ -----------------------------------------------------------------------------
166
+
167
+ The output generated by this program (-o/--output, default "pdist.h5") may be
168
+ plotted by the ``plothist`` program. See ``plothist --help`` for more
169
+ information.
170
+
171
+
172
+ -----------------------------------------------------------------------------
173
+ Parallelization
174
+ -----------------------------------------------------------------------------
175
+
176
+ This tool supports parallelized binning, including reading of input data.
177
+ Parallel processing is the default. For simple cases (reading pre-computed
178
+ input data, modest numbers of segments), serial processing (--serial) may be
179
+ more efficient.
180
+
181
+
182
+ -----------------------------------------------------------------------------
183
+ Command-line options
184
+ -----------------------------------------------------------------------------
185
+
186
+ '''
187
+
188
+ def __init__(self):
189
+ super().__init__()
190
+
191
+ # Parallel processing by default (this is not actually necessary, but it is
192
+ # informative!)
193
+ self.wm_env.default_work_manager = self.wm_env.default_parallel_work_manager
194
+
195
+ # These are used throughout
196
+ self.progress = ProgressIndicatorComponent()
197
+ self.data_reader = WESTDataReader()
198
+ self.input_dssynth = WESTDSSynthesizer(default_dsname='pcoord')
199
+ self.input_wdssynth = WESTWDSSynthesizer(default_dsname='seg_index')
200
+ self.iter_range = IterRangeSelection(self.data_reader)
201
+ self.iter_range.include_args['iter_step'] = False
202
+ self.binspec = None
203
+ self.output_filename = None
204
+ self.output_file = None
205
+
206
+ self.dsspec = None
207
+ self.wt_dsspec = None # dsspec for weights
208
+
209
+ # These are used during histogram generation only
210
+ self.iter_start = None
211
+ self.iter_stop = None
212
+ self.ndim = None
213
+ self.ntimepoints = None
214
+ self.dset_dtype = None
215
+ self.binbounds = None # bin boundaries for each dimension
216
+ self.midpoints = None # bin midpoints for each dimension
217
+ self.data_range = None # data range for each dimension, as the pairs (min,max)
218
+ self.ignore_out_of_range = False
219
+ self.compress_output = False
220
+
221
+ def add_args(self, parser):
222
+ self.data_reader.add_args(parser)
223
+
224
+ self.iter_range.add_args(parser)
225
+
226
+ parser.add_argument(
227
+ '-b',
228
+ '--bins',
229
+ dest='bins',
230
+ metavar='BINEXPR',
231
+ default='100',
232
+ help='''Use BINEXPR for bins. This may be an integer, which will be used for each
233
+ dimension of the progress coordinate; a list of integers (formatted as [n1,n2,...])
234
+ which will use n1 bins for the first dimension, n2 for the second dimension, and so on;
235
+ or a list of lists of boundaries (formatted as [[a1, a2, ...], [b1, b2, ...], ... ]), which
236
+ will use [a1, a2, ...] as bin boundaries for the first dimension, [b1, b2, ...] as bin boundaries
237
+ for the second dimension, and so on. (Default: 100 bins in each dimension.)''',
238
+ )
239
+
240
+ parser.add_argument(
241
+ '-o', '--output', dest='output', default='pdist.h5', help='''Store results in OUTPUT (default: %(default)s).'''
242
+ )
243
+ parser.add_argument(
244
+ '-C',
245
+ '--compress',
246
+ action='store_true',
247
+ help='''Compress histograms. May make storage of higher-dimensional histograms
248
+ more tractable, at the (possible extreme) expense of increased analysis time.
249
+ (Default: no compression.)''',
250
+ )
251
+
252
+ parser.add_argument(
253
+ '--loose',
254
+ dest='ignore_out_of_range',
255
+ action='store_true',
256
+ help='''Ignore values that do not fall within bins. (Risky, as this can make buggy bin
257
+ boundaries appear as reasonable data. Only use if you are
258
+ sure of your bin boundary specification.)''',
259
+ )
260
+
261
+ igroup = parser.add_argument_group('input dataset options').add_mutually_exclusive_group(required=False)
262
+
263
+ igroup.add_argument(
264
+ '--construct-dataset',
265
+ help='''Use the given function (as in module.function) to extract source data.
266
+ This function will be called once per iteration as function(n_iter, iter_group)
267
+ to construct data for one iteration. Data returned must be indexable as
268
+ [seg_id][timepoint][dimension]''',
269
+ )
270
+
271
+ igroup.add_argument(
272
+ '--dsspecs', nargs='+', metavar='DSSPEC', help='''Construct probability distribution from one or more DSSPECs.'''
273
+ )
274
+
275
+ wgroup = parser.add_argument_group('input weight dataset options').add_mutually_exclusive_group(required=False)
276
+ wgroup.add_argument(
277
+ '--construct-wdataset',
278
+ help='''Use the given function (as in module.function) to extract weight data.
279
+ This function will be called once per iteration as function(n_iter, iter_group)
280
+ to construct data for one iteration. Data returned must be indexable as
281
+ [seg_id]''',
282
+ )
283
+
284
+ self.progress.add_args(parser)
285
+
286
+ def process_args(self, args):
287
+ self.progress.process_args(args)
288
+ self.data_reader.process_args(args)
289
+ self.input_dssynth.h5filename = self.data_reader.we_h5filename
290
+ self.input_dssynth.process_args(args)
291
+ self.dsspec = self.input_dssynth.dsspec
292
+
293
+ # Carrying an open HDF5 file across a fork() seems to corrupt the entire HDF5 library
294
+ # Open the WEST HDF5 file just long enough to process our iteration range, then close
295
+ # and reopen in go() [which executes after the fork]
296
+ with self.data_reader:
297
+ self.iter_range.process_args(args)
298
+
299
+ # Reading potential custom weights
300
+ self.input_wdssynth.h5filename = self.data_reader.we_h5filename
301
+ self.input_wdssynth.process_args(args)
302
+ self.wt_dsspec = self.input_wdssynth.dsspec
303
+
304
+ self.binspec = args.bins
305
+ self.output_filename = args.output
306
+ self.ignore_out_of_range = bool(args.ignore_out_of_range)
307
+ self.compress_output = args.compress or False
308
+
309
+ def go(self):
310
+ self.data_reader.open('r')
311
+ pi = self.progress.indicator
312
+ pi.operation = 'Initializing'
313
+ with pi:
314
+ self.output_file = h5py.File(self.output_filename, 'w')
315
+ h5io.stamp_creator_data(self.output_file)
316
+
317
+ self.iter_start = self.iter_range.iter_start
318
+ self.iter_stop = self.iter_range.iter_stop
319
+
320
+ # Construct bin boundaries
321
+ self.construct_bins(self.parse_binspec(self.binspec))
322
+ for idim, (binbounds, midpoints) in enumerate(zip(self.binbounds, self.midpoints)):
323
+ self.output_file['binbounds_{}'.format(idim)] = binbounds
324
+ self.output_file['midpoints_{}'.format(idim)] = midpoints
325
+
326
+ # construct histogram
327
+ self.construct_histogram()
328
+
329
+ # Record iteration range
330
+ iter_range = self.iter_range.iter_range()
331
+ self.output_file['n_iter'] = iter_range
332
+ self.iter_range.record_data_iter_range(self.output_file['histograms'])
333
+
334
+ self.output_file.close()
335
+
336
+ @staticmethod
337
+ def parse_binspec(binspec):
338
+ namespace = {'numpy': np, 'np': np, 'inf': float('inf')}
339
+
340
+ try:
341
+ binspec_compiled = eval(binspec, namespace)
342
+ except Exception as e:
343
+ raise ValueError('invalid bin specification: {!r}'.format(e))
344
+ else:
345
+ if log.isEnabledFor(logging.DEBUG):
346
+ log.debug('bin specs: {!r}'.format(binspec_compiled))
347
+ return binspec_compiled
348
+
349
+ def construct_bins(self, bins):
350
+ '''
351
+ Construct bins according to ``bins``, which may be:
352
+
353
+ 1) A scalar integer (for that number of bins in each dimension)
354
+ 2) A sequence of integers (specifying number of bins for each dimension)
355
+ 3) A sequence of sequences of bin boundaries (specifying boundaries for each dimension)
356
+
357
+ Sets ``self.binbounds`` to a list of arrays of bin boundaries appropriate for passing to
358
+ fasthist.histnd, along with ``self.midpoints`` to the midpoints of the bins.
359
+ '''
360
+
361
+ if not isiterable(bins):
362
+ self._construct_bins_from_scalar(bins)
363
+ elif not isiterable(bins[0]):
364
+ self._construct_bins_from_int_seq(bins)
365
+ else:
366
+ self._construct_bins_from_bound_seqs(bins)
367
+
368
+ if log.isEnabledFor(logging.DEBUG):
369
+ log.debug('binbounds: {!r}'.format(self.binbounds))
370
+
371
+ def scan_data_shape(self):
372
+ if self.ndim is None:
373
+ dset = self.dsspec.get_iter_data(self.iter_start)
374
+ self.ntimepoints = dset.shape[1]
375
+ self.ndim = dset.shape[2]
376
+ self.dset_dtype = dset.dtype
377
+
378
+ def scan_data_range(self):
379
+ '''Scan input data for range in each dimension. The number of dimensions is determined
380
+ from the shape of the progress coordinate as of self.iter_start.'''
381
+
382
+ self.progress.indicator.new_operation('Scanning for data range', self.iter_stop - self.iter_start)
383
+ self.scan_data_shape()
384
+
385
+ dset_dtype = self.dset_dtype
386
+ ndim = self.ndim
387
+ dsspec = self.dsspec
388
+
389
+ try:
390
+ minval = np.finfo(dset_dtype).min
391
+ maxval = np.finfo(dset_dtype).max
392
+ except ValueError:
393
+ minval = np.iinfo(dset_dtype).min
394
+ maxval = np.iinfo(dset_dtype).max
395
+
396
+ data_range = self.data_range = [(maxval, minval) for _i in range(self.ndim)]
397
+
398
+ # futures = []
399
+ # for n_iter in xrange(self.iter_start, self.iter_stop):
400
+ # _remote_min_max(ndim, dset_dtype, n_iter, dsspec)
401
+ # futures.append(self.work_manager.submit(_remote_min_max, args=(ndim, dset_dtype, n_iter, dsspec)))
402
+
403
+ # for future in self.work_manager.as_completed(futures):
404
+ for future in self.work_manager.submit_as_completed(
405
+ ((_remote_min_max, (ndim, dset_dtype, n_iter, dsspec), {}) for n_iter in range(self.iter_start, self.iter_stop)),
406
+ self.max_queue_len,
407
+ ):
408
+ bounds = future.get_result(discard=True)
409
+ for idim in range(ndim):
410
+ current_min, current_max = data_range[idim]
411
+ current_min = min(current_min, bounds[idim][0])
412
+ current_max = max(current_max, bounds[idim][1])
413
+ data_range[idim] = (current_min, current_max)
414
+ self.progress.indicator.progress += 1
415
+
416
+ def _construct_bins_from_scalar(self, bins):
417
+ if self.data_range is None:
418
+ self.scan_data_range()
419
+
420
+ self.binbounds = []
421
+ self.midpoints = []
422
+ for idim in range(self.ndim):
423
+ lb, ub = self.data_range[idim]
424
+ # Advance just beyond the upper bound of the range, so that we catch
425
+ # the maximum in the histogram
426
+ if ub > 0:
427
+ ub *= 1.01
428
+ else:
429
+ ub /= 1.01
430
+
431
+ boundset = np.linspace(lb, ub, bins + 1)
432
+ midpoints = (boundset[:-1] + boundset[1:]) / 2.0
433
+ self.binbounds.append(boundset)
434
+ self.midpoints.append(midpoints)
435
+
436
+ def _construct_bins_from_int_seq(self, bins):
437
+ if self.data_range is None:
438
+ self.scan_data_range()
439
+
440
+ self.binbounds = []
441
+ self.midpoints = []
442
+ for idim in range(self.ndim):
443
+ lb, ub = self.data_range[idim]
444
+ # Advance just beyond the upper bound of the range, so that we catch
445
+ # the maximum in the histogram
446
+ if ub > 0:
447
+ ub *= 1.01
448
+ else:
449
+ ub /= 1.01
450
+
451
+ boundset = np.linspace(lb, ub, bins[idim] + 1)
452
+ midpoints = (boundset[:-1] + boundset[1:]) / 2.0
453
+ self.binbounds.append(boundset)
454
+ self.midpoints.append(midpoints)
455
+
456
+ def _construct_bins_from_bound_seqs(self, bins):
457
+ self.binbounds = []
458
+ self.midpoints = []
459
+ for boundset in bins:
460
+ boundset = np.asarray(boundset)
461
+ if (np.diff(boundset) <= 0).any():
462
+ raise ValueError('boundary set {!r} is not strictly monotonically increasing'.format(boundset))
463
+ self.binbounds.append(boundset)
464
+ self.midpoints.append((boundset[:-1] + boundset[1:]) / 2.0)
465
+
466
+ def construct_histogram(self):
467
+ '''Construct a histogram using bins previously constructed with ``construct_bins()``.
468
+ The time series of histogram values is stored in ``histograms``.
469
+ Each histogram in the time series is normalized.'''
470
+
471
+ self.scan_data_shape()
472
+
473
+ iter_count = self.iter_stop - self.iter_start
474
+ histograms_ds = self.output_file.create_dataset(
475
+ 'histograms',
476
+ dtype=np.float64,
477
+ shape=((iter_count,) + tuple(len(bounds) - 1 for bounds in self.binbounds)),
478
+ compression=9 if self.compress_output else None,
479
+ )
480
+ binbounds = [np.require(boundset, self.dset_dtype, 'C') for boundset in self.binbounds]
481
+
482
+ self.progress.indicator.new_operation('Constructing histograms', self.iter_stop - self.iter_start)
483
+ task_gen = (
484
+ (
485
+ _remote_bin_iter,
486
+ (iiter, n_iter, self.dsspec, self.wt_dsspec, 1 if iiter > 0 else 0, binbounds, self.ignore_out_of_range),
487
+ {},
488
+ )
489
+ for (iiter, n_iter) in enumerate(range(self.iter_start, self.iter_stop))
490
+ )
491
+ # futures = set()
492
+ # for iiter, n_iter in enumerate(xrange(self.iter_start, self.iter_stop)):
493
+ # initpoint = 1 if iiter > 0 else 0
494
+ # futures.add(self.work_manager.submit(_remote_bin_iter,
495
+ # args=(iiter, n_iter, self.dsspec, self.wt_dsspec, initpoint, binbounds)))
496
+
497
+ # for future in self.work_manager.as_completed(futures):
498
+ # future = self.work_manager.wait_any(futures)
499
+ # for future in self.work_manager.submit_as_completed(task_gen, self.queue_size):
500
+ log.debug('max queue length: {!r}'.format(self.max_queue_len))
501
+ for future in self.work_manager.submit_as_completed(task_gen, self.max_queue_len):
502
+ iiter, n_iter, iter_hist = future.get_result(discard=True)
503
+ self.progress.indicator.progress += 1
504
+
505
+ # store histogram
506
+ histograms_ds[iiter] = iter_hist
507
+ del iter_hist, future
508
+
509
+
510
+ def entry_point():
511
+ WPDist().main()
512
+
513
+
514
+ if __name__ == '__main__':
515
+ entry_point()
@@ -0,0 +1,82 @@
1
+ from westpa.tools import WESTMasterCommand, WESTParallelTool
2
+ from warnings import warn
3
+
4
+ from westpa.cli.tools.w_reweight import RWMatrix
5
+
6
+ # Just a shim to make sure everything works and is backwards compatible.
7
+ # We're making sure it has the appropriate functions so that it can be called
8
+ # as a regular tool, and not a subcommand.
9
+
10
+
11
+ class PAMatrix(RWMatrix):
12
+ subcommand = 'init'
13
+ help_text = 'averages and CIs for path-tracing kinetics analysis'
14
+ default_output_file = 'flux_matrices.h5'
15
+ # This isn't strictly necessary, but for the moment, here it is.
16
+ # We really need to modify the underlying class so that we don't pull this sort of stuff if it isn't necessary.
17
+ # That'll take some case handling, which is fine.
18
+ # default_kinetics_file = 'assign.h5'
19
+
20
+
21
+ class WReweight(WESTMasterCommand, WESTParallelTool):
22
+ prog = 'w_postanalysis_matrix'
23
+ subcommands = [PAMatrix]
24
+ subparsers_title = 'calculate state-to-state kinetics by tracing trajectories'
25
+ description = '''\
26
+ Generate a colored transition matrix from a WE assignment file. The subsequent
27
+ analysis requires that the assignments are calculated using only the initial and
28
+ final time points of each trajectory segment. This may require downsampling the
29
+ h5file generated by a WE simulation. In the future w_assign may be enhanced to optionally
30
+ generate the necessary assignment file from a h5file with intermediate time points.
31
+ Additionally, this analysis is currently only valid on simulations performed under
32
+ either equilibrium or steady-state conditions without recycling target states.
33
+
34
+ -----------------------------------------------------------------------------
35
+ Output format
36
+ -----------------------------------------------------------------------------
37
+
38
+ The output file (-o/--output, by default "reweight.h5") contains the
39
+ following datasets:
40
+
41
+ ``/bin_populations`` [window, bin]
42
+ The reweighted populations of each bin based on windows. Bins contain
43
+ one color each, so to recover the original un-colored spatial bins,
44
+ one must sum over all states.
45
+
46
+ ``/iterations`` [iteration]
47
+ *(Structured -- see below)* Sparse matrix data from each
48
+ iteration. They are reconstructed and averaged within the
49
+ w_reweight {kinetics/probs} routines so that observables may
50
+ be calculated. Each group contains 4 vectors of data:
51
+
52
+ flux
53
+ *(Floating-point)* The weight of a series of flux events
54
+ cols
55
+ *(Integer)* The bin from which a flux event began.
56
+ cols
57
+ *(Integer)* The bin into which the walker fluxed.
58
+ obs
59
+ *(Integer)* How many flux events were observed during this
60
+ iteration.
61
+
62
+ -----------------------------------------------------------------------------
63
+ Command-line options
64
+ -----------------------------------------------------------------------------
65
+ '''
66
+
67
+
68
+ def entry_point():
69
+ warn('{} is being deprecated. Please use w_reweight instead.'.format(WReweight.prog))
70
+ # If we're not really supporting subcommands...
71
+ import sys
72
+
73
+ try:
74
+ if sys.argv[1] != 'init':
75
+ sys.argv.insert(1, 'init')
76
+ except Exception:
77
+ sys.argv.insert(1, 'init')
78
+ WReweight().main()
79
+
80
+
81
+ if __name__ == '__main__':
82
+ entry_point()
@@ -0,0 +1,53 @@
1
+ from westpa.tools import WESTMasterCommand, WESTParallelTool
2
+ from warnings import warn
3
+
4
+ from westpa.cli.tools.w_reweight import RWAverage
5
+
6
+ # Just a shim to make sure everything works and is backwards compatible.
7
+ # We're making sure it has the appropriate functions so that it can be called
8
+ # as a regular tool, and not a subcommand.
9
+
10
+
11
+ class PAAverage(RWAverage):
12
+ subcommand = 'average'
13
+ help_text = ''
14
+ default_output_file = 'kinrw.h5'
15
+ # This isn't strictly necessary, but for the moment, here it is.
16
+ # We really need to modify the underlying class so that we don't pull this sort of stuff if it isn't necessary.
17
+ # That'll take some case handling, which is fine.
18
+ default_kinetics_file = 'flux_matrices.h5'
19
+
20
+
21
+ class WReweight(WESTMasterCommand, WESTParallelTool):
22
+ prog = 'w_postanalysis_reweight'
23
+ subcommands = [PAAverage]
24
+ subparsers_title = 'calculate state-to-state kinetics by tracing trajectories'
25
+ description = '''\
26
+ A convenience function to run kinetics/probs. Bin assignments,
27
+ including macrostate definitions, are required. (See
28
+ "w_assign --help" for more information).
29
+
30
+ For more information on the individual subcommands this subs in for, run
31
+ w_reweight {kinetics/probs} --help.
32
+
33
+ -----------------------------------------------------------------------------
34
+ Command-line options
35
+ -----------------------------------------------------------------------------
36
+ '''
37
+
38
+
39
+ def entry_point():
40
+ warn('{} is being deprecated. Please use w_reweight instead.'.format(WReweight.prog))
41
+ # If we're not really supporting subcommands...
42
+ import sys
43
+
44
+ try:
45
+ if sys.argv[1] != 'average':
46
+ sys.argv.insert(1, 'average')
47
+ except Exception:
48
+ sys.argv.insert(1, 'average')
49
+ WReweight().main()
50
+
51
+
52
+ if __name__ == '__main__':
53
+ entry_point()