westpa 2022.13__cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. westpa/__init__.py +14 -0
  2. westpa/_version.py +21 -0
  3. westpa/analysis/__init__.py +5 -0
  4. westpa/analysis/core.py +749 -0
  5. westpa/analysis/statistics.py +27 -0
  6. westpa/analysis/trajectories.py +369 -0
  7. westpa/cli/__init__.py +0 -0
  8. westpa/cli/core/__init__.py +0 -0
  9. westpa/cli/core/w_fork.py +152 -0
  10. westpa/cli/core/w_init.py +230 -0
  11. westpa/cli/core/w_run.py +77 -0
  12. westpa/cli/core/w_states.py +212 -0
  13. westpa/cli/core/w_succ.py +99 -0
  14. westpa/cli/core/w_truncate.py +68 -0
  15. westpa/cli/tools/__init__.py +0 -0
  16. westpa/cli/tools/ploterr.py +506 -0
  17. westpa/cli/tools/plothist.py +706 -0
  18. westpa/cli/tools/w_assign.py +597 -0
  19. westpa/cli/tools/w_bins.py +166 -0
  20. westpa/cli/tools/w_crawl.py +119 -0
  21. westpa/cli/tools/w_direct.py +557 -0
  22. westpa/cli/tools/w_dumpsegs.py +94 -0
  23. westpa/cli/tools/w_eddist.py +506 -0
  24. westpa/cli/tools/w_fluxanl.py +376 -0
  25. westpa/cli/tools/w_ipa.py +832 -0
  26. westpa/cli/tools/w_kinavg.py +127 -0
  27. westpa/cli/tools/w_kinetics.py +96 -0
  28. westpa/cli/tools/w_multi_west.py +414 -0
  29. westpa/cli/tools/w_ntop.py +213 -0
  30. westpa/cli/tools/w_pdist.py +515 -0
  31. westpa/cli/tools/w_postanalysis_matrix.py +82 -0
  32. westpa/cli/tools/w_postanalysis_reweight.py +53 -0
  33. westpa/cli/tools/w_red.py +491 -0
  34. westpa/cli/tools/w_reweight.py +780 -0
  35. westpa/cli/tools/w_select.py +226 -0
  36. westpa/cli/tools/w_stateprobs.py +111 -0
  37. westpa/cli/tools/w_timings.py +113 -0
  38. westpa/cli/tools/w_trace.py +599 -0
  39. westpa/core/__init__.py +0 -0
  40. westpa/core/_rc.py +673 -0
  41. westpa/core/binning/__init__.py +55 -0
  42. westpa/core/binning/_assign.c +36018 -0
  43. westpa/core/binning/_assign.cpython-312-aarch64-linux-gnu.so +0 -0
  44. westpa/core/binning/_assign.pyx +370 -0
  45. westpa/core/binning/assign.py +454 -0
  46. westpa/core/binning/binless.py +96 -0
  47. westpa/core/binning/binless_driver.py +54 -0
  48. westpa/core/binning/binless_manager.py +189 -0
  49. westpa/core/binning/bins.py +47 -0
  50. westpa/core/binning/mab.py +506 -0
  51. westpa/core/binning/mab_driver.py +54 -0
  52. westpa/core/binning/mab_manager.py +197 -0
  53. westpa/core/data_manager.py +1761 -0
  54. westpa/core/extloader.py +74 -0
  55. westpa/core/h5io.py +1079 -0
  56. westpa/core/kinetics/__init__.py +24 -0
  57. westpa/core/kinetics/_kinetics.c +45174 -0
  58. westpa/core/kinetics/_kinetics.cpython-312-aarch64-linux-gnu.so +0 -0
  59. westpa/core/kinetics/_kinetics.pyx +815 -0
  60. westpa/core/kinetics/events.py +147 -0
  61. westpa/core/kinetics/matrates.py +156 -0
  62. westpa/core/kinetics/rate_averaging.py +266 -0
  63. westpa/core/progress.py +218 -0
  64. westpa/core/propagators/__init__.py +54 -0
  65. westpa/core/propagators/executable.py +592 -0
  66. westpa/core/propagators/loaders.py +196 -0
  67. westpa/core/reweight/__init__.py +14 -0
  68. westpa/core/reweight/_reweight.c +36899 -0
  69. westpa/core/reweight/_reweight.cpython-312-aarch64-linux-gnu.so +0 -0
  70. westpa/core/reweight/_reweight.pyx +439 -0
  71. westpa/core/reweight/matrix.py +126 -0
  72. westpa/core/segment.py +119 -0
  73. westpa/core/sim_manager.py +839 -0
  74. westpa/core/states.py +359 -0
  75. westpa/core/systems.py +93 -0
  76. westpa/core/textio.py +74 -0
  77. westpa/core/trajectory.py +603 -0
  78. westpa/core/we_driver.py +910 -0
  79. westpa/core/wm_ops.py +43 -0
  80. westpa/core/yamlcfg.py +298 -0
  81. westpa/fasthist/__init__.py +34 -0
  82. westpa/fasthist/_fasthist.c +38755 -0
  83. westpa/fasthist/_fasthist.cpython-312-aarch64-linux-gnu.so +0 -0
  84. westpa/fasthist/_fasthist.pyx +222 -0
  85. westpa/mclib/__init__.py +271 -0
  86. westpa/mclib/__main__.py +28 -0
  87. westpa/mclib/_mclib.c +34610 -0
  88. westpa/mclib/_mclib.cpython-312-aarch64-linux-gnu.so +0 -0
  89. westpa/mclib/_mclib.pyx +226 -0
  90. westpa/oldtools/__init__.py +4 -0
  91. westpa/oldtools/aframe/__init__.py +35 -0
  92. westpa/oldtools/aframe/atool.py +75 -0
  93. westpa/oldtools/aframe/base_mixin.py +26 -0
  94. westpa/oldtools/aframe/binning.py +178 -0
  95. westpa/oldtools/aframe/data_reader.py +560 -0
  96. westpa/oldtools/aframe/iter_range.py +200 -0
  97. westpa/oldtools/aframe/kinetics.py +117 -0
  98. westpa/oldtools/aframe/mcbs.py +153 -0
  99. westpa/oldtools/aframe/output.py +39 -0
  100. westpa/oldtools/aframe/plotting.py +88 -0
  101. westpa/oldtools/aframe/trajwalker.py +126 -0
  102. westpa/oldtools/aframe/transitions.py +469 -0
  103. westpa/oldtools/cmds/__init__.py +0 -0
  104. westpa/oldtools/cmds/w_ttimes.py +361 -0
  105. westpa/oldtools/files.py +34 -0
  106. westpa/oldtools/miscfn.py +23 -0
  107. westpa/oldtools/stats/__init__.py +4 -0
  108. westpa/oldtools/stats/accumulator.py +35 -0
  109. westpa/oldtools/stats/edfs.py +129 -0
  110. westpa/oldtools/stats/mcbs.py +96 -0
  111. westpa/tools/__init__.py +33 -0
  112. westpa/tools/binning.py +472 -0
  113. westpa/tools/core.py +340 -0
  114. westpa/tools/data_reader.py +159 -0
  115. westpa/tools/dtypes.py +31 -0
  116. westpa/tools/iter_range.py +198 -0
  117. westpa/tools/kinetics_tool.py +343 -0
  118. westpa/tools/plot.py +283 -0
  119. westpa/tools/progress.py +17 -0
  120. westpa/tools/selected_segs.py +154 -0
  121. westpa/tools/wipi.py +751 -0
  122. westpa/trajtree/__init__.py +4 -0
  123. westpa/trajtree/_trajtree.c +17829 -0
  124. westpa/trajtree/_trajtree.cpython-312-aarch64-linux-gnu.so +0 -0
  125. westpa/trajtree/_trajtree.pyx +130 -0
  126. westpa/trajtree/trajtree.py +117 -0
  127. westpa/westext/__init__.py +0 -0
  128. westpa/westext/adaptvoronoi/__init__.py +3 -0
  129. westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
  130. westpa/westext/hamsm_restarting/__init__.py +3 -0
  131. westpa/westext/hamsm_restarting/example_overrides.py +35 -0
  132. westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
  133. westpa/westext/stringmethod/__init__.py +11 -0
  134. westpa/westext/stringmethod/fourier_fitting.py +69 -0
  135. westpa/westext/stringmethod/string_driver.py +253 -0
  136. westpa/westext/stringmethod/string_method.py +306 -0
  137. westpa/westext/weed/BinCluster.py +180 -0
  138. westpa/westext/weed/ProbAdjustEquil.py +100 -0
  139. westpa/westext/weed/UncertMath.py +247 -0
  140. westpa/westext/weed/__init__.py +10 -0
  141. westpa/westext/weed/weed_driver.py +192 -0
  142. westpa/westext/wess/ProbAdjust.py +101 -0
  143. westpa/westext/wess/__init__.py +6 -0
  144. westpa/westext/wess/wess_driver.py +217 -0
  145. westpa/work_managers/__init__.py +57 -0
  146. westpa/work_managers/core.py +396 -0
  147. westpa/work_managers/environment.py +134 -0
  148. westpa/work_managers/mpi.py +318 -0
  149. westpa/work_managers/processes.py +201 -0
  150. westpa/work_managers/serial.py +28 -0
  151. westpa/work_managers/threads.py +79 -0
  152. westpa/work_managers/zeromq/__init__.py +20 -0
  153. westpa/work_managers/zeromq/core.py +635 -0
  154. westpa/work_managers/zeromq/node.py +131 -0
  155. westpa/work_managers/zeromq/work_manager.py +526 -0
  156. westpa/work_managers/zeromq/worker.py +320 -0
  157. westpa-2022.13.dist-info/METADATA +179 -0
  158. westpa-2022.13.dist-info/RECORD +162 -0
  159. westpa-2022.13.dist-info/WHEEL +7 -0
  160. westpa-2022.13.dist-info/entry_points.txt +30 -0
  161. westpa-2022.13.dist-info/licenses/LICENSE +21 -0
  162. westpa-2022.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,506 @@
1
+ import logging
2
+
3
+ import h5py
4
+ import numpy as np
5
+
6
+ from westpa.tools import WESTParallelTool, ProgressIndicatorComponent
7
+ from westpa.fasthist import histnd, normhistnd
8
+ from westpa.core import h5io
9
+
10
+ log = logging.getLogger('w_eddist')
11
+
12
+
13
+ class DurationDataset:
14
+ '''A facade for the 'dsspec' dataclass that incorporates the mask into get_iter_data method'''
15
+
16
+ def __init__(self, dataset, mask, iter_start=1):
17
+ self.dataset = dataset
18
+ self.mask = mask
19
+ self.dtype = dataset.dtype
20
+ self.iter_start = iter_start
21
+
22
+ def get_iter_data(self, n_iter):
23
+ try:
24
+ assert n_iter >= self.iter_start
25
+ dset = self.dataset[n_iter - 1][self.mask[n_iter - self.iter_start]]
26
+ except (AssertionError, IndexError):
27
+ raise ValueError("Iteration {} is not within the iteration range".format(n_iter))
28
+ nsegs = dset.shape[0]
29
+ if nsegs == 0:
30
+ return None
31
+ else:
32
+ return dset.reshape(nsegs, 1, 1)
33
+
34
+
35
+ def isiterable(x):
36
+ try:
37
+ iter(x)
38
+ except TypeError:
39
+ return False
40
+ else:
41
+ return True
42
+
43
+
44
+ def _remote_min_max(ndim, dset_dtype, n_iter, dsspec):
45
+ try:
46
+ minval = np.finfo(dset_dtype).min
47
+ maxval = np.finfo(dset_dtype).max
48
+ except ValueError:
49
+ minval = np.iinfo(dset_dtype).min
50
+ maxval = np.iinfo(dset_dtype).max
51
+
52
+ data_range = [(maxval, minval) for _i in range(ndim)]
53
+
54
+ dset = dsspec.get_iter_data(n_iter)
55
+
56
+ if dset is None:
57
+ return data_range
58
+
59
+ for idim in range(ndim):
60
+ dimdata = dset[:, :, idim]
61
+ current_min, current_max = data_range[idim]
62
+ current_min = min(current_min, dimdata.min())
63
+ current_max = max(current_max, dimdata.max())
64
+ data_range[idim] = (current_min, current_max)
65
+ del dimdata
66
+ del dset
67
+ return data_range
68
+
69
+
70
+ def _remote_bin_iter(iiter, n_iter, dsspec, wt_dsspec, initpoint, binbounds, ignore_out_of_range):
71
+ iter_hist_shape = tuple(len(bounds) - 1 for bounds in binbounds)
72
+ iter_hist = np.zeros(iter_hist_shape, dtype=np.float64)
73
+
74
+ dset = dsspec.get_iter_data(n_iter)
75
+ if dset is None:
76
+ return iiter, n_iter, iter_hist
77
+ else:
78
+ npts = dset.shape[1]
79
+ weights = wt_dsspec.get_iter_data(n_iter)[:, 0, 0]
80
+
81
+ # dset = dset[:,initpoint:,:]
82
+ for ipt in range(npts - initpoint):
83
+ histnd(dset[:, ipt, :], binbounds, weights, out=iter_hist, binbound_check=False, ignore_out_of_range=ignore_out_of_range)
84
+
85
+ del weights, dset
86
+
87
+ # normalize histogram
88
+ normhistnd(iter_hist, binbounds)
89
+ return iiter, n_iter, iter_hist
90
+
91
+
92
+ class WEDDist(WESTParallelTool):
93
+ prog = 'w_eddist'
94
+ description = '''\
95
+ Calculate time-resolved transition-event duration distribution from kinetics results
96
+
97
+
98
+ -----------------------------------------------------------------------------
99
+ Source data
100
+ -----------------------------------------------------------------------------
101
+
102
+ Source data is collected from the results of 'w_kinetics trace' (see w_kinetics trace --help for
103
+ more information on generating this dataset).
104
+
105
+
106
+ -----------------------------------------------------------------------------
107
+ Histogram binning
108
+ -----------------------------------------------------------------------------
109
+
110
+ By default, histograms are constructed with 100 bins in each dimension. This
111
+ can be overridden by specifying -b/--bins, which accepts a number of different
112
+ kinds of arguments:
113
+
114
+ a single integer N
115
+ N uniformly spaced bins will be used in each dimension.
116
+
117
+ a sequence of integers N1,N2,... (comma-separated)
118
+ N1 uniformly spaced bins will be used for the first dimension, N2 for the
119
+ second, and so on.
120
+
121
+ a list of lists [[B11, B12, B13, ...], [B21, B22, B23, ...], ...]
122
+ The bin boundaries B11, B12, B13, ... will be used for the first dimension,
123
+ B21, B22, B23, ... for the second dimension, and so on. These bin
124
+ boundaries need not be uniformly spaced. These expressions will be
125
+ evaluated with Python's ``eval`` construct, with ``np`` available for
126
+ use [e.g. to specify bins using np.arange()].
127
+
128
+ The first two forms (integer, list of integers) will trigger a scan of all
129
+ data in each dimension in order to determine the minimum and maximum values,
130
+ which may be very expensive for large datasets. This can be avoided by
131
+ explicitly providing bin boundaries using the list-of-lists form.
132
+
133
+ Note that these bins are *NOT* at all related to the bins used to drive WE
134
+ sampling.
135
+
136
+
137
+ -----------------------------------------------------------------------------
138
+ Output format
139
+ -----------------------------------------------------------------------------
140
+
141
+ The output file produced (specified by -o/--output, defaulting to "pdist.h5")
142
+ may be fed to plothist to generate plots (or appropriately processed text or
143
+ HDF5 files) from this data. In short, the following datasets are created:
144
+
145
+ ``histograms``
146
+ Normalized histograms. The first axis corresponds to iteration, and
147
+ remaining axes correspond to dimensions of the input dataset.
148
+
149
+ ``/binbounds_0``
150
+ Vector of bin boundaries for the first (index 0) dimension. Additional
151
+ datasets similarly named (/binbounds_1, /binbounds_2, ...) are created
152
+ for additional dimensions.
153
+
154
+ ``/midpoints_0``
155
+ Vector of bin midpoints for the first (index 0) dimension. Additional
156
+ datasets similarly named are created for additional dimensions.
157
+
158
+ ``n_iter``
159
+ Vector of iteration numbers corresponding to the stored histograms (i.e.
160
+ the first axis of the ``histograms`` dataset).
161
+
162
+
163
+ -----------------------------------------------------------------------------
164
+ Subsequent processing
165
+ -----------------------------------------------------------------------------
166
+
167
+ The output generated by this program (-o/--output, default "pdist.h5") may be
168
+ plotted by the ``plothist`` program. See ``plothist --help`` for more
169
+ information.
170
+
171
+
172
+ -----------------------------------------------------------------------------
173
+ Parallelization
174
+ -----------------------------------------------------------------------------
175
+
176
+ This tool supports parallelized binning, including reading of input data.
177
+ Parallel processing is the default. For simple cases (reading pre-computed
178
+ input data, modest numbers of segments), serial processing (--serial) may be
179
+ more efficient.
180
+
181
+
182
+ -----------------------------------------------------------------------------
183
+ Command-line options
184
+ -----------------------------------------------------------------------------
185
+
186
+ '''
187
+
188
+ def __init__(self):
189
+ super().__init__()
190
+
191
+ # Parallel processing by default (this is not actually necessary, but it is
192
+ # informative!)
193
+ self.wm_env.default_work_manager = self.wm_env.default_parallel_work_manager
194
+
195
+ # These are used throughout
196
+ self.progress = ProgressIndicatorComponent()
197
+ self.default_kinetics_file = 'kintrace.h5'
198
+ self.kinetics_filename = None
199
+ self.kinetics_file = None # Kinavg file
200
+ self.istate = None
201
+ self.fstate = None
202
+ # Duration and weight dsspecs
203
+ self.duration_dsspec = None
204
+ self.wt_dsspec = None
205
+ self.binspec = None
206
+ self.output_filename = None
207
+ self.output_file = None
208
+
209
+ # These are used during histogram generation only
210
+ self.iter_start = None
211
+ self.iter_stop = None
212
+ self.ndim = None
213
+ # self.ntimepoints = None
214
+ self.dset_dtype = None
215
+ self.binbounds = None # bin boundaries for each dimension
216
+ self.midpoints = None # bin midpoints for each dimension
217
+ self.data_range = None # data range for each dimension, as the pairs (min,max)
218
+ self.ignore_out_of_range = False
219
+ self.compress_output = False
220
+
221
+ def add_args(self, parser):
222
+ parser.add_argument(
223
+ '-b',
224
+ '--bins',
225
+ dest='bins',
226
+ metavar='BINEXPR',
227
+ default='100',
228
+ help='''Use BINEXPR for bins. This may be an integer, which will be used for each
229
+ dimension of the progress coordinate; a list of integers (formatted as [n1,n2,...])
230
+ which will use n1 bins for the first dimension, n2 for the second dimension, and so on;
231
+ or a list of lists of boundaries (formatted as [[a1, a2, ...], [b1, b2, ...], ... ]), which
232
+ will use [a1, a2, ...] as bin boundaries for the first dimension, [b1, b2, ...] as bin boundaries
233
+ for the second dimension, and so on. (Default: 100 bins in each dimension.)''',
234
+ )
235
+
236
+ parser.add_argument(
237
+ '-C',
238
+ '--compress',
239
+ action='store_true',
240
+ help='''Compress histograms. May make storage of higher-dimensional histograms
241
+ more tractable, at the (possible extreme) expense of increased analysis time.
242
+ (Default: no compression.)''',
243
+ )
244
+
245
+ parser.add_argument(
246
+ '--loose',
247
+ dest='ignore_out_of_range',
248
+ action='store_true',
249
+ help='''Ignore values that do not fall within bins. (Risky, as this can make buggy bin
250
+ boundaries appear as reasonable data. Only use if you are
251
+ sure of your bin boundary specification.)''',
252
+ )
253
+
254
+ parser.add_argument('--istate', type=int, required=True, dest='istate', help='''Initial state defining transition event''')
255
+
256
+ parser.add_argument('--fstate', type=int, required=True, dest='fstate', help='''Final state defining transition event''')
257
+
258
+ itergroup = parser.add_argument_group('iteration range options')
259
+
260
+ itergroup.add_argument(
261
+ '--first-iter', default=1, dest='iter_start', type=int, help='''Iteration to begin analysis (default: 1)'''
262
+ )
263
+
264
+ itergroup.add_argument('--last-iter', dest='iter_stop', type=int, help='''Iteration to end analysis''')
265
+
266
+ iogroup = parser.add_argument_group('input/output options')
267
+
268
+ # self.default_kinetics_file will be picked up as a class attribute from the appropriate subclass
269
+ iogroup.add_argument(
270
+ '-k',
271
+ '--kinetics',
272
+ default=self.default_kinetics_file,
273
+ help='''Populations and transition rates (including evolution) are stored in KINETICS
274
+ (default: %(default)s).''',
275
+ )
276
+ iogroup.add_argument(
277
+ '-o', '--output', dest='output', default='eddist.h5', help='''Store results in OUTPUT (default: %(default)s).'''
278
+ )
279
+
280
+ self.progress.add_args(parser)
281
+
282
+ def process_args(self, args):
283
+ self.progress.process_args(args)
284
+ self.kinetics_filename = args.kinetics
285
+ self.istate = args.istate
286
+ self.fstate = args.fstate
287
+ self.kinetics_file = h5io.WESTPAH5File(self.kinetics_filename, 'r')
288
+
289
+ self.iter_start = args.iter_start
290
+ if args.iter_stop is None:
291
+ self.iter_stop = self.kinetics_file.attrs['iter_stop']
292
+ else:
293
+ self.iter_stop = args.iter_stop + 1
294
+
295
+ self.binspec = args.bins
296
+ self.output_filename = args.output
297
+ self.ignore_out_of_range = bool(args.ignore_out_of_range)
298
+ self.compress_output = args.compress or False
299
+
300
+ def go(self):
301
+ pi = self.progress.indicator
302
+ pi.operation = 'Initializing'
303
+ with pi:
304
+ self.duration = self.kinetics_file['durations'][self.iter_start - 1 : self.iter_stop - 1]
305
+
306
+ # Only select transition events from specified istate to fstate
307
+ mask = (self.duration['istate'] == self.istate) & (self.duration['fstate'] == self.fstate)
308
+
309
+ self.duration_dsspec = DurationDataset(self.kinetics_file['durations']['duration'], mask, self.iter_start)
310
+ self.wt_dsspec = DurationDataset(self.kinetics_file['durations']['weight'], mask, self.iter_start)
311
+
312
+ self.output_file = h5py.File(self.output_filename, 'w')
313
+ h5io.stamp_creator_data(self.output_file)
314
+
315
+ # Construct bin boundaries
316
+ self.construct_bins(self.parse_binspec(self.binspec))
317
+ for idim, (binbounds, midpoints) in enumerate(zip(self.binbounds, self.midpoints)):
318
+ self.output_file['binbounds_{}'.format(idim)] = binbounds
319
+ self.output_file['midpoints_{}'.format(idim)] = midpoints
320
+
321
+ # construct histogram
322
+ self.construct_histogram()
323
+
324
+ # Record iteration range
325
+ iter_range = np.arange(self.iter_start, self.iter_stop, 1, dtype=(np.min_scalar_type(self.iter_stop)))
326
+ self.output_file['n_iter'] = iter_range
327
+ self.output_file['histograms'].attrs['iter_start'] = self.iter_start
328
+ self.output_file['histograms'].attrs['iter_stop'] = self.iter_stop
329
+
330
+ self.output_file.close()
331
+
332
+ @staticmethod
333
+ def parse_binspec(binspec):
334
+ namespace = {'numpy': np, 'np': np, 'inf': float('inf')}
335
+
336
+ try:
337
+ binspec_compiled = eval(binspec, namespace)
338
+ except Exception as e:
339
+ raise ValueError('invalid bin specification: {!r}'.format(e))
340
+ else:
341
+ if log.isEnabledFor(logging.DEBUG):
342
+ log.debug('bin specs: {!r}'.format(binspec_compiled))
343
+ return binspec_compiled
344
+
345
+ def construct_bins(self, bins):
346
+ '''
347
+ Construct bins according to ``bins``, which may be:
348
+
349
+ 1) A scalar integer (for that number of bins in each dimension)
350
+ 2) A sequence of integers (specifying number of bins for each dimension)
351
+ 3) A sequence of sequences of bin boundaries (specifying boundaries for each dimension)
352
+
353
+ Sets ``self.binbounds`` to a list of arrays of bin boundaries appropriate for passing to
354
+ fasthist.histnd, along with ``self.midpoints`` to the midpoints of the bins.
355
+ '''
356
+
357
+ if not isiterable(bins):
358
+ self._construct_bins_from_scalar(bins)
359
+ elif not isiterable(bins[0]):
360
+ self._construct_bins_from_int_seq(bins)
361
+ else:
362
+ self._construct_bins_from_bound_seqs(bins)
363
+
364
+ if log.isEnabledFor(logging.DEBUG):
365
+ log.debug('binbounds: {!r}'.format(self.binbounds))
366
+
367
+ def scan_data_shape(self):
368
+ if self.ndim is None:
369
+ dset = self.duration_dsspec
370
+ # self.ntimepoints = dset.shape[1]
371
+ # self.ndim = dset.shape[2]
372
+ self.ndim = 1
373
+ self.dset_dtype = dset.dtype
374
+
375
+ def scan_data_range(self):
376
+ '''Scan input data for range in each dimension. The number of dimensions is determined
377
+ from the shape of the progress coordinate as of self.iter_start.'''
378
+
379
+ self.progress.indicator.new_operation('Scanning for data range', self.iter_stop - self.iter_start)
380
+ self.scan_data_shape()
381
+
382
+ dset_dtype = self.dset_dtype
383
+ ndim = self.ndim
384
+ dsspec = self.duration_dsspec
385
+
386
+ try:
387
+ minval = np.finfo(dset_dtype).min
388
+ maxval = np.finfo(dset_dtype).max
389
+ except ValueError:
390
+ minval = np.iinfo(dset_dtype).min
391
+ maxval = np.iinfo(dset_dtype).max
392
+
393
+ data_range = self.data_range = [(maxval, minval) for _i in range(self.ndim)]
394
+
395
+ # futures = []
396
+ # for n_iter in xrange(self.iter_start, self.iter_stop):
397
+ # _remote_min_max(ndim, dset_dtype, n_iter, dsspec)
398
+ # futures.append(self.work_manager.submit(_remote_min_max, args=(ndim, dset_dtype, n_iter, dsspec)))
399
+
400
+ # for future in self.work_manager.as_completed(futures):
401
+ for future in self.work_manager.submit_as_completed(
402
+ ((_remote_min_max, (ndim, dset_dtype, n_iter, dsspec), {}) for n_iter in range(self.iter_start, self.iter_stop)),
403
+ self.max_queue_len,
404
+ ):
405
+ bounds = future.get_result(discard=True)
406
+ for idim in range(ndim):
407
+ current_min, current_max = data_range[idim]
408
+ current_min = min(current_min, bounds[idim][0])
409
+ current_max = max(current_max, bounds[idim][1])
410
+ data_range[idim] = (current_min, current_max)
411
+ self.progress.indicator.progress += 1
412
+
413
+ def _construct_bins_from_scalar(self, bins):
414
+ if self.data_range is None:
415
+ self.scan_data_range()
416
+
417
+ # print(self.data_range)
418
+
419
+ self.binbounds = []
420
+ self.midpoints = []
421
+ for idim in range(self.ndim):
422
+ lb, ub = self.data_range[idim]
423
+ # Advance just beyond the upper bound of the range, so that we catch
424
+ # the maximum in the histogram
425
+ ub *= 1.01
426
+
427
+ # lb -= 0.01
428
+
429
+ boundset = np.linspace(lb, ub, bins + 1)
430
+ midpoints = (boundset[:-1] + boundset[1:]) / 2.0
431
+ self.binbounds.append(boundset)
432
+ self.midpoints.append(midpoints)
433
+
434
+ def _construct_bins_from_int_seq(self, bins):
435
+ if self.data_range is None:
436
+ self.scan_data_range()
437
+
438
+ self.binbounds = []
439
+ self.midpoints = []
440
+ for idim in range(self.ndim):
441
+ lb, ub = self.data_range[idim]
442
+ # Advance just beyond the upper bound of the range, so that we catch
443
+ # the maximum in the histogram
444
+ ub *= 1.01
445
+
446
+ boundset = np.linspace(lb, ub, bins[idim] + 1)
447
+ midpoints = (boundset[:-1] + boundset[1:]) / 2.0
448
+ self.binbounds.append(boundset)
449
+ self.midpoints.append(midpoints)
450
+
451
+ def _construct_bins_from_bound_seqs(self, bins):
452
+ self.binbounds = []
453
+ self.midpoints = []
454
+ for boundset in bins:
455
+ boundset = np.asarray(boundset)
456
+ if (np.diff(boundset) <= 0).any():
457
+ raise ValueError('boundary set {!r} is not strictly monotonically increasing'.format(boundset))
458
+ self.binbounds.append(boundset)
459
+ self.midpoints.append((boundset[:-1] + boundset[1:]) / 2.0)
460
+
461
+ def construct_histogram(self):
462
+ '''Construct a histogram using bins previously constructed with ``construct_bins()``.
463
+ The time series of histogram values is stored in ``histograms``.
464
+ Each histogram in the time series is normalized.'''
465
+
466
+ self.scan_data_shape()
467
+
468
+ iter_count = self.iter_stop - self.iter_start
469
+ histograms_ds = self.output_file.create_dataset(
470
+ 'histograms',
471
+ dtype=np.float64,
472
+ shape=((iter_count,) + tuple(len(bounds) - 1 for bounds in self.binbounds)),
473
+ compression=9 if self.compress_output else None,
474
+ )
475
+ binbounds = [np.require(boundset, self.dset_dtype, 'C') for boundset in self.binbounds]
476
+
477
+ self.progress.indicator.new_operation('Constructing histograms', self.iter_stop - self.iter_start)
478
+ task_gen = (
479
+ (_remote_bin_iter, (iiter, n_iter, self.duration_dsspec, self.wt_dsspec, 0, binbounds, self.ignore_out_of_range), {})
480
+ for (iiter, n_iter) in enumerate(range(self.iter_start, self.iter_stop))
481
+ )
482
+ # futures = set()
483
+ # for iiter, n_iter in enumerate(xrange(self.iter_start, self.iter_stop)):
484
+ # initpoint = 1 if iiter > 0 else 0
485
+ # futures.add(self.work_manager.submit(_remote_bin_iter,
486
+ # args=(iiter, n_iter, self.dsspec, self.wt_dsspec, initpoint, binbounds)))
487
+
488
+ # for future in self.work_manager.as_completed(futures):
489
+ # future = self.work_manager.wait_any(futures)
490
+ # for future in self.work_manager.submit_as_completed(task_gen, self.queue_size):
491
+ log.debug('max queue length: {!r}'.format(self.max_queue_len))
492
+ for future in self.work_manager.submit_as_completed(task_gen, self.max_queue_len):
493
+ iiter, n_iter, iter_hist = future.get_result(discard=True)
494
+ self.progress.indicator.progress += 1
495
+
496
+ # store histogram
497
+ histograms_ds[iiter] = iter_hist
498
+ del iter_hist, future
499
+
500
+
501
+ def entry_point():
502
+ WEDDist().main()
503
+
504
+
505
+ if __name__ == '__main__':
506
+ entry_point()