westpa 2022.12__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of westpa might be problematic. Click here for more details.

Files changed (149) hide show
  1. westpa/__init__.py +14 -0
  2. westpa/_version.py +21 -0
  3. westpa/analysis/__init__.py +5 -0
  4. westpa/analysis/core.py +746 -0
  5. westpa/analysis/statistics.py +27 -0
  6. westpa/analysis/trajectories.py +360 -0
  7. westpa/cli/__init__.py +0 -0
  8. westpa/cli/core/__init__.py +0 -0
  9. westpa/cli/core/w_fork.py +152 -0
  10. westpa/cli/core/w_init.py +230 -0
  11. westpa/cli/core/w_run.py +77 -0
  12. westpa/cli/core/w_states.py +212 -0
  13. westpa/cli/core/w_succ.py +99 -0
  14. westpa/cli/core/w_truncate.py +68 -0
  15. westpa/cli/tools/__init__.py +0 -0
  16. westpa/cli/tools/ploterr.py +506 -0
  17. westpa/cli/tools/plothist.py +706 -0
  18. westpa/cli/tools/w_assign.py +596 -0
  19. westpa/cli/tools/w_bins.py +166 -0
  20. westpa/cli/tools/w_crawl.py +119 -0
  21. westpa/cli/tools/w_direct.py +547 -0
  22. westpa/cli/tools/w_dumpsegs.py +94 -0
  23. westpa/cli/tools/w_eddist.py +506 -0
  24. westpa/cli/tools/w_fluxanl.py +376 -0
  25. westpa/cli/tools/w_ipa.py +833 -0
  26. westpa/cli/tools/w_kinavg.py +127 -0
  27. westpa/cli/tools/w_kinetics.py +96 -0
  28. westpa/cli/tools/w_multi_west.py +414 -0
  29. westpa/cli/tools/w_ntop.py +213 -0
  30. westpa/cli/tools/w_pdist.py +515 -0
  31. westpa/cli/tools/w_postanalysis_matrix.py +82 -0
  32. westpa/cli/tools/w_postanalysis_reweight.py +53 -0
  33. westpa/cli/tools/w_red.py +491 -0
  34. westpa/cli/tools/w_reweight.py +780 -0
  35. westpa/cli/tools/w_select.py +226 -0
  36. westpa/cli/tools/w_stateprobs.py +111 -0
  37. westpa/cli/tools/w_trace.py +599 -0
  38. westpa/core/__init__.py +0 -0
  39. westpa/core/_rc.py +673 -0
  40. westpa/core/binning/__init__.py +55 -0
  41. westpa/core/binning/_assign.cpython-313-x86_64-linux-gnu.so +0 -0
  42. westpa/core/binning/assign.py +455 -0
  43. westpa/core/binning/binless.py +96 -0
  44. westpa/core/binning/binless_driver.py +54 -0
  45. westpa/core/binning/binless_manager.py +190 -0
  46. westpa/core/binning/bins.py +47 -0
  47. westpa/core/binning/mab.py +506 -0
  48. westpa/core/binning/mab_driver.py +54 -0
  49. westpa/core/binning/mab_manager.py +198 -0
  50. westpa/core/data_manager.py +1694 -0
  51. westpa/core/extloader.py +74 -0
  52. westpa/core/h5io.py +995 -0
  53. westpa/core/kinetics/__init__.py +24 -0
  54. westpa/core/kinetics/_kinetics.cpython-313-x86_64-linux-gnu.so +0 -0
  55. westpa/core/kinetics/events.py +147 -0
  56. westpa/core/kinetics/matrates.py +156 -0
  57. westpa/core/kinetics/rate_averaging.py +266 -0
  58. westpa/core/progress.py +218 -0
  59. westpa/core/propagators/__init__.py +54 -0
  60. westpa/core/propagators/executable.py +719 -0
  61. westpa/core/reweight/__init__.py +14 -0
  62. westpa/core/reweight/_reweight.cpython-313-x86_64-linux-gnu.so +0 -0
  63. westpa/core/reweight/matrix.py +126 -0
  64. westpa/core/segment.py +119 -0
  65. westpa/core/sim_manager.py +835 -0
  66. westpa/core/states.py +359 -0
  67. westpa/core/systems.py +93 -0
  68. westpa/core/textio.py +74 -0
  69. westpa/core/trajectory.py +330 -0
  70. westpa/core/we_driver.py +910 -0
  71. westpa/core/wm_ops.py +43 -0
  72. westpa/core/yamlcfg.py +391 -0
  73. westpa/fasthist/__init__.py +34 -0
  74. westpa/fasthist/_fasthist.cpython-313-x86_64-linux-gnu.so +0 -0
  75. westpa/mclib/__init__.py +271 -0
  76. westpa/mclib/__main__.py +28 -0
  77. westpa/mclib/_mclib.cpython-313-x86_64-linux-gnu.so +0 -0
  78. westpa/oldtools/__init__.py +4 -0
  79. westpa/oldtools/aframe/__init__.py +35 -0
  80. westpa/oldtools/aframe/atool.py +75 -0
  81. westpa/oldtools/aframe/base_mixin.py +26 -0
  82. westpa/oldtools/aframe/binning.py +178 -0
  83. westpa/oldtools/aframe/data_reader.py +560 -0
  84. westpa/oldtools/aframe/iter_range.py +200 -0
  85. westpa/oldtools/aframe/kinetics.py +117 -0
  86. westpa/oldtools/aframe/mcbs.py +153 -0
  87. westpa/oldtools/aframe/output.py +39 -0
  88. westpa/oldtools/aframe/plotting.py +90 -0
  89. westpa/oldtools/aframe/trajwalker.py +126 -0
  90. westpa/oldtools/aframe/transitions.py +469 -0
  91. westpa/oldtools/cmds/__init__.py +0 -0
  92. westpa/oldtools/cmds/w_ttimes.py +361 -0
  93. westpa/oldtools/files.py +34 -0
  94. westpa/oldtools/miscfn.py +23 -0
  95. westpa/oldtools/stats/__init__.py +4 -0
  96. westpa/oldtools/stats/accumulator.py +35 -0
  97. westpa/oldtools/stats/edfs.py +129 -0
  98. westpa/oldtools/stats/mcbs.py +96 -0
  99. westpa/tools/__init__.py +33 -0
  100. westpa/tools/binning.py +472 -0
  101. westpa/tools/core.py +340 -0
  102. westpa/tools/data_reader.py +159 -0
  103. westpa/tools/dtypes.py +31 -0
  104. westpa/tools/iter_range.py +198 -0
  105. westpa/tools/kinetics_tool.py +340 -0
  106. westpa/tools/plot.py +283 -0
  107. westpa/tools/progress.py +17 -0
  108. westpa/tools/selected_segs.py +154 -0
  109. westpa/tools/wipi.py +751 -0
  110. westpa/trajtree/__init__.py +4 -0
  111. westpa/trajtree/_trajtree.cpython-313-x86_64-linux-gnu.so +0 -0
  112. westpa/trajtree/trajtree.py +117 -0
  113. westpa/westext/__init__.py +0 -0
  114. westpa/westext/adaptvoronoi/__init__.py +3 -0
  115. westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
  116. westpa/westext/hamsm_restarting/__init__.py +3 -0
  117. westpa/westext/hamsm_restarting/example_overrides.py +35 -0
  118. westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
  119. westpa/westext/stringmethod/__init__.py +11 -0
  120. westpa/westext/stringmethod/fourier_fitting.py +69 -0
  121. westpa/westext/stringmethod/string_driver.py +253 -0
  122. westpa/westext/stringmethod/string_method.py +306 -0
  123. westpa/westext/weed/BinCluster.py +180 -0
  124. westpa/westext/weed/ProbAdjustEquil.py +100 -0
  125. westpa/westext/weed/UncertMath.py +247 -0
  126. westpa/westext/weed/__init__.py +10 -0
  127. westpa/westext/weed/weed_driver.py +192 -0
  128. westpa/westext/wess/ProbAdjust.py +101 -0
  129. westpa/westext/wess/__init__.py +6 -0
  130. westpa/westext/wess/wess_driver.py +217 -0
  131. westpa/work_managers/__init__.py +57 -0
  132. westpa/work_managers/core.py +396 -0
  133. westpa/work_managers/environment.py +134 -0
  134. westpa/work_managers/mpi.py +318 -0
  135. westpa/work_managers/processes.py +187 -0
  136. westpa/work_managers/serial.py +28 -0
  137. westpa/work_managers/threads.py +79 -0
  138. westpa/work_managers/zeromq/__init__.py +20 -0
  139. westpa/work_managers/zeromq/core.py +641 -0
  140. westpa/work_managers/zeromq/node.py +131 -0
  141. westpa/work_managers/zeromq/work_manager.py +526 -0
  142. westpa/work_managers/zeromq/worker.py +320 -0
  143. westpa-2022.12.dist-info/AUTHORS +22 -0
  144. westpa-2022.12.dist-info/LICENSE +21 -0
  145. westpa-2022.12.dist-info/METADATA +193 -0
  146. westpa-2022.12.dist-info/RECORD +149 -0
  147. westpa-2022.12.dist-info/WHEEL +6 -0
  148. westpa-2022.12.dist-info/entry_points.txt +29 -0
  149. westpa-2022.12.dist-info/top_level.txt +1 -0
@@ -0,0 +1,361 @@
1
+ import argparse
2
+ import logging
3
+
4
+ import numpy as np
5
+ from numpy.random import Generator, MT19937
6
+
7
+ import westpa
8
+
9
+ from oldtools.aframe import (
10
+ WESTAnalysisTool,
11
+ BinningMixin,
12
+ WESTDataReaderMixin,
13
+ IterRangeMixin,
14
+ MCBSMixin,
15
+ TransitionAnalysisMixin,
16
+ KineticsAnalysisMixin,
17
+ CommonOutputMixin,
18
+ BFDataManager,
19
+ BFTransitionAnalysisMixin,
20
+ )
21
+
22
+ log = logging.getLogger('w_ttimes')
23
+
24
+
25
+ ciinfo_dtype = np.dtype([('expectation', np.float64), ('ci_lower', np.float64), ('ci_upper', np.float64)])
26
+
27
+
28
+ class WTTimesBase:
29
+ def __init__(self):
30
+ super().__init__()
31
+
32
+ self.ed_stats_filename = None
33
+ self.fpt_stats_filename = None
34
+ self.flux_stats_filename = None
35
+ self.rate_stats_filename = None
36
+ self.suppress_headers = None
37
+ self.print_bin_labels = None
38
+
39
+ self.ttimes_group = None
40
+
41
+ self.durations = None
42
+ self.fpts = None
43
+ self.fluxes = None
44
+ self.rates = None
45
+
46
+ self.rng = Generator(MT19937())
47
+
48
+ def add_args(self, parser, upcall=True):
49
+ '''Add arguments to a parser common to all analyses of this type.'''
50
+ if upcall:
51
+ try:
52
+ upfunc = super().add_args
53
+ except AttributeError:
54
+ pass
55
+ else:
56
+ upfunc(parser)
57
+
58
+ output_options = parser.add_argument_group('kinetics analysis output options')
59
+ output_options.add_argument(
60
+ '--edstats',
61
+ dest='ed_stats',
62
+ default='edstats.txt',
63
+ help='Store event duration statistics in ED_STATS (default: edstats.txt)',
64
+ )
65
+ if self.bf_mode:
66
+ output_options.add_argument(
67
+ '--fptstats',
68
+ dest='fpt_stats',
69
+ default='fptstats.txt',
70
+ help='Store first passage time statistics in FPT_STATS (default: fptstats.txt).',
71
+ )
72
+ else:
73
+ output_options.add_argument(
74
+ '--fptstats', dest='fpt_stats', help='Store first passage time statistics in FPT_STATS (default: do not store).'
75
+ )
76
+ output_options.add_argument(
77
+ '--fluxstats',
78
+ dest='flux_stats',
79
+ default='fluxstats.txt',
80
+ help='Store flux statistics in FLUX_STATS (default: fluxstats.txt)',
81
+ )
82
+ output_options.add_argument(
83
+ '--ratestats',
84
+ dest='rate_stats',
85
+ default='ratestats.txt',
86
+ help='Store rate statistics in RATE_STATS (default: ratestats.txt)',
87
+ )
88
+ self.add_common_output_args(output_options)
89
+
90
+ def process_args(self, args, upcall=True):
91
+ self.ed_stats_filename = args.ed_stats
92
+ self.fpt_stats_filename = args.fpt_stats
93
+ self.flux_stats_filename = args.flux_stats
94
+ self.rate_stats_filename = args.rate_stats
95
+ self.process_common_output_args(args)
96
+ self.calc_fpts = bool(args.fpt_stats)
97
+
98
+ if upcall:
99
+ try:
100
+ upfunc = super().process_args
101
+ except AttributeError:
102
+ pass
103
+ else:
104
+ upfunc(args)
105
+
106
+ def gen_stats(self):
107
+ self.require_transitions_group()
108
+ westpa.rc.pstatus('Analyzing transition statistics...')
109
+
110
+ dt = self.dt
111
+ n_sets = self.mcbs_nsets
112
+ lbi, ubi = self.calc_ci_bound_indices()
113
+
114
+ total_time = self.get_total_time()
115
+ transdat_ds = self.trans_h5group['transitions']
116
+ transdat_ibin = transdat_ds['initial_bin']
117
+
118
+ if not self.bf_mode:
119
+ transdat_niter = transdat_ds['n_iter']
120
+ transdat_in_range = (transdat_niter >= self.first_iter) & (transdat_niter <= self.last_iter)
121
+
122
+ durations = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
123
+ fpts = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
124
+ fluxes = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
125
+ rates = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
126
+
127
+ syn_avg_durations = np.empty((n_sets,), np.float64)
128
+ syn_avg_fpts = np.empty((n_sets,), np.float64)
129
+ syn_avg_fluxes = np.empty((n_sets,), np.float64)
130
+ syn_avg_rates = np.empty((n_sets,), np.float64)
131
+
132
+ w_n_bins = len(str(self.n_bins))
133
+ w_n_sets = len(str(n_sets))
134
+
135
+ for ibin in self.analysis_initial_bins:
136
+ if self.bf_mode:
137
+ trans_ibin = transdat_ds[transdat_ibin == ibin]
138
+ else:
139
+ trans_ibin = transdat_ds[(transdat_ibin == ibin) & transdat_in_range]
140
+
141
+ for fbin in self.analysis_final_bins:
142
+ # trans_ifbins = trans_ibin[trans_ibin['final_bin'] == fbin]
143
+ trans_ifbins = np.extract(trans_ibin['final_bin'] == fbin, trans_ibin)
144
+ dlen = len(trans_ifbins)
145
+
146
+ if not dlen:
147
+ continue
148
+
149
+ trans_weights = trans_ifbins['final_weight']
150
+ trans_durations = trans_ifbins['duration']
151
+ trans_fpts = trans_ifbins['fpt']
152
+ trans_ibinprobs = trans_ifbins['initial_bin_pop']
153
+
154
+ durations[ibin, fbin]['expectation'] = np.average(trans_durations, weights=trans_weights) * dt
155
+ fpts[ibin, fbin]['expectation'] = np.average(trans_fpts, weights=trans_weights) * dt
156
+ avg_flux = trans_weights.sum() / total_time
157
+ fluxes[ibin, fbin]['expectation'] = avg_flux
158
+ rates[ibin, fbin]['expectation'] = avg_flux / trans_ibinprobs.mean()
159
+
160
+ for iset in range(n_sets):
161
+ westpa.rc.pstatus(
162
+ '\r {:{w_n_bins}d}->{:<{w_n_bins}d} set {:{w_n_sets}d}/{:<{w_n_sets}d}, set size {:<20d}'.format(
163
+ ibin, fbin, iset + 1, n_sets, dlen, w_n_bins=w_n_bins, w_n_sets=w_n_sets
164
+ ),
165
+ end='',
166
+ )
167
+ westpa.rc.pflush()
168
+ indices = self.rng.integers(dlen, size=(dlen,))
169
+ # syn_weights = trans_weights[indices]
170
+ # syn_durations = trans_durations[indices]
171
+ # syn_fpts = trans_fpts[indices]
172
+ # syn_ibinprobs = trans_ibinprobs[indices]
173
+ syn_weights = trans_weights.take(indices)
174
+ syn_durations = trans_durations.take(indices)
175
+ syn_fpts = trans_fpts.take(indices)
176
+ syn_ibinprobs = trans_ibinprobs.take(indices)
177
+
178
+ syn_avg_durations[iset] = np.average(syn_durations, weights=syn_weights) * dt
179
+ syn_avg_fpts[iset] = np.average(syn_fpts, weights=syn_weights) * dt
180
+ syn_avg_fluxes[iset] = syn_weights.sum() / total_time
181
+ syn_avg_rates[iset] = syn_avg_fluxes[iset] / syn_ibinprobs.mean()
182
+
183
+ del indices, syn_weights, syn_durations, syn_ibinprobs, syn_fpts
184
+
185
+ syn_avg_durations.sort()
186
+ syn_avg_fpts.sort()
187
+ syn_avg_fluxes.sort()
188
+ syn_avg_rates.sort()
189
+
190
+ durations[ibin, fbin]['ci_lower'] = syn_avg_durations[lbi]
191
+ durations[ibin, fbin]['ci_upper'] = syn_avg_durations[ubi]
192
+
193
+ fpts[ibin, fbin]['ci_lower'] = syn_avg_fpts[lbi]
194
+ fpts[ibin, fbin]['ci_upper'] = syn_avg_fpts[ubi]
195
+
196
+ fluxes[ibin, fbin]['ci_lower'] = syn_avg_fluxes[lbi]
197
+ fluxes[ibin, fbin]['ci_upper'] = syn_avg_fluxes[ubi]
198
+
199
+ rates[ibin, fbin]['ci_lower'] = syn_avg_rates[lbi]
200
+ rates[ibin, fbin]['ci_upper'] = syn_avg_rates[ubi]
201
+
202
+ del trans_weights, trans_durations, trans_ibinprobs, trans_ifbins, trans_fpts
203
+ westpa.rc.pstatus()
204
+ del trans_ibin
205
+
206
+ for dsname, data in (('duration', durations), ('fpt', fpts), ('flux', fluxes), ('rate', rates)):
207
+ try:
208
+ del self.ttimes_group[dsname]
209
+ except KeyError:
210
+ pass
211
+
212
+ ds = self.ttimes_group.create_dataset(dsname, data=data)
213
+ attrs = ds.attrs
214
+ attrs['dt'] = dt
215
+ attrs['total_time'] = total_time
216
+ attrs['ci_alpha'] = self.mcbs_alpha
217
+ attrs['ci_n_sets'] = self.mcbs_nsets
218
+
219
+ if not self.bf_mode:
220
+ self.record_data_iter_range(ds)
221
+ self.record_data_binhash(ds)
222
+
223
+ attrs = self.ttimes_group.attrs
224
+ attrs = ds.attrs
225
+ attrs['dt'] = dt
226
+ attrs['total_time'] = total_time
227
+ attrs['ci_alpha'] = self.mcbs_alpha
228
+ attrs['ci_n_sets'] = self.mcbs_nsets
229
+
230
+ self.durations = durations
231
+ self.fluxes = fluxes
232
+ self.rates = rates
233
+
234
+ if not self.bf_mode:
235
+ self.record_data_iter_range(self.ttimes_group)
236
+ self.record_data_binhash(self.ttimes_group)
237
+
238
+ def summarize_stats(self):
239
+ for array, dsname, argname, title in (
240
+ (self.durations, 'duration', 'ed_stats_filename', 'event duration'),
241
+ (self.fpts, 'fpt', 'fpt_stats_filename', 'first passage time'),
242
+ (self.fluxes, 'flux', 'flux_stats_filename', 'flux'),
243
+ (self.rates, 'rate', 'rate_stats_filename', 'rate'),
244
+ ):
245
+ filename = getattr(self, argname)
246
+ if filename:
247
+ if array is None:
248
+ try:
249
+ array = self.ttimes_group[dsname]
250
+ except KeyError:
251
+ westpa.rc.pstatus('{} data not found in {}'.format(title, self.anal_h5name))
252
+ continue
253
+
254
+ self.summarize_ci(
255
+ filename,
256
+ array,
257
+ title,
258
+ self.mcbs_display_confidence,
259
+ headers=(not self.suppress_headers),
260
+ labels=self.print_bin_labels,
261
+ )
262
+
263
+ def summarize_ci(self, filename, array, title, confidence, headers, labels):
264
+ format_2d = (
265
+ '{ibin:{mw}d} {fbin:{mw}d} {0:20.15g} {1:20.15g} {2:20.15g} {3:20.15g} {4:20.15g} {5:20.15g}\n'
266
+ )
267
+ max_ibin_width = len(str(self.n_bins - 1))
268
+
269
+ outfile = open(filename, 'wt')
270
+ if headers:
271
+ outfile.write(
272
+ '''\
273
+ # {title:} statistics
274
+ # confidence interval = {confidence}%
275
+ # ----
276
+ # column 0: initial bin index
277
+ # column 1: final bin index
278
+ # column 2: lower bound of confidence interval
279
+ # column 3: upper bound of confidence interval
280
+ # column 4: width of confidence interval
281
+ # column 5: relative width of confidence interval [abs(width/average)]
282
+ # column 6: symmetrized error [max(upper-average, average-lower)]
283
+ # ----
284
+ '''.format(
285
+ title=title, confidence=confidence
286
+ )
287
+ )
288
+ if labels:
289
+ self.write_bin_labels(outfile)
290
+ outfile.write('----\n')
291
+
292
+ for ibin in self.analysis_initial_bins:
293
+ for fbin in self.analysis_final_bins:
294
+ mean = array[ibin, fbin]['expectation']
295
+ lb = array[ibin, fbin]['ci_lower']
296
+ ub = array[ibin, fbin]['ci_upper']
297
+ ciwidth = ub - lb
298
+ relciwidth = abs(ciwidth / mean)
299
+ symmerr = max(mean - lb, ub - mean)
300
+
301
+ outfile.write(
302
+ format_2d.format(
303
+ *list(map(float, (mean, lb, ub, ciwidth, relciwidth, symmerr))), ibin=ibin, fbin=fbin, mw=max_ibin_width
304
+ )
305
+ )
306
+
307
+ def main(self):
308
+ parser = argparse.ArgumentParser('w_ttimes', description=self.description)
309
+ westpa.rc.add_args(parser)
310
+ self.add_args(parser)
311
+
312
+ args = parser.parse_args()
313
+ westpa.rc.process_args(args, config_required=False)
314
+ self.process_args(args)
315
+
316
+ self.check_iter_range()
317
+ self.check_bin_selection()
318
+ self.open_analysis_backing()
319
+ self.ttimes_group = self.require_analysis_group('w_ttimes', replace=False)
320
+ self.require_bin_assignments()
321
+ self.require_transitions()
322
+ self.gen_stats()
323
+ self.summarize_stats()
324
+
325
+
326
+ class WTTimesWE(
327
+ WTTimesBase,
328
+ CommonOutputMixin,
329
+ MCBSMixin,
330
+ KineticsAnalysisMixin,
331
+ TransitionAnalysisMixin,
332
+ BinningMixin,
333
+ IterRangeMixin,
334
+ WESTDataReaderMixin,
335
+ WESTAnalysisTool,
336
+ ):
337
+ description = 'Trace the WEST trajectory tree and report on transition kinetics.'
338
+
339
+ def __init__(self):
340
+ super().__init__()
341
+
342
+
343
+ class WTTimesBF(
344
+ WTTimesBase, CommonOutputMixin, MCBSMixin, KineticsAnalysisMixin, BFTransitionAnalysisMixin, BFDataManager, WESTAnalysisTool
345
+ ):
346
+ description = 'Trace one or more brute force trajectories and report on transition kinetics.'
347
+ default_chunksize = 65536 * 4
348
+
349
+ def __init__(self):
350
+ super().__init__()
351
+ self.bf_mode = True
352
+ self.config_required = False
353
+ self.usecols = None
354
+ self.input_files = None
355
+
356
+ def check_iter_range(self):
357
+ pass # do nothing, since we don't do iteration ranges for brute force
358
+
359
+ def get_total_time(self):
360
+ self.require_bf_h5file()
361
+ return np.add.reduce([self.get_traj_len(traj_id) - 1 for traj_id in range(self.get_n_trajs())]) * self.dt
@@ -0,0 +1,34 @@
1
+ import warnings
2
+ import numpy
3
+
4
+
5
+ def load_npy_or_text(filename):
6
+ '''Load an array from an existing .npy file, or read a text file and
7
+ convert to a NumPy array. In either case, return a NumPy array. If a
8
+ pickled NumPy dataset is found, memory-map it read-only. If the specified
9
+ file does not contain a pickled NumPy array, attempt to read the file using
10
+ numpy.loadtxt(filename, **kwargs).'''
11
+
12
+ f = open(filename, 'rb')
13
+ try:
14
+ f.seek(0)
15
+ except IOError:
16
+ # Not seekable - assume a text stream
17
+ return numpy.loadtxt(filename)
18
+ else:
19
+ f.close()
20
+
21
+ # File is seekable
22
+ try:
23
+ # try to mmap it
24
+ with warnings.catch_warnings():
25
+ warnings.simplefilter("ignore")
26
+ return numpy.load(filename, 'r')
27
+
28
+ except IOError as e:
29
+ if 'Failed to interpret' in str(e):
30
+ pass
31
+ else:
32
+ raise
33
+
34
+ return numpy.loadtxt(filename)
@@ -0,0 +1,23 @@
1
+ '''Miscellaneous support functions for WEST and WEST tools'''
2
+
3
+ import re
4
+
5
+
6
+ def parse_int_list(list_string):
7
+ '''Parse a simple list consisting of integers or ranges of integers separated by commas. Ranges are specified
8
+ as min:max, and include the maximum value (unlike Python's ``range``). Duplicate values are ignored.
9
+ Returns the result as a sorted list. Raises ValueError if the list cannot be parsed.'''
10
+
11
+ try:
12
+ entries = set()
13
+ fields = re.split(r'\s*[;,]\s*', list_string)
14
+ for field in fields:
15
+ if ':' in field:
16
+ lb, ub = list(map(int, re.split(r'\s*:\s*', field)))
17
+ entries.update(list(range(lb, ub + 1)))
18
+ else:
19
+ entries.add(int(field))
20
+ except (ValueError, TypeError):
21
+ raise ValueError('invalid integer range string {!r}'.format(list_string))
22
+ else:
23
+ return sorted(entries)
@@ -0,0 +1,4 @@
1
+ from . import accumulator # noqa
2
+ from .accumulator import RunningStatsAccumulator # noqa
3
+
4
+ from . import mcbs # noqa
@@ -0,0 +1,35 @@
1
+ import numpy
2
+
3
+ NAN = float('nan')
4
+
5
+
6
+ class RunningStatsAccumulator:
7
+ def __init__(self, shape, dtype=numpy.float64, count_dtype=numpy.uint, weight_dtype=numpy.float64, mask_value=NAN):
8
+ self.sum = numpy.zeros(shape, dtype)
9
+ self.sqsum = numpy.zeros(shape, dtype)
10
+ self.weight = numpy.zeros(shape, weight_dtype)
11
+ self.count = numpy.zeros(shape, count_dtype)
12
+ self.mask_value = mask_value
13
+
14
+ def incorporate(self, index, value, weight):
15
+ self.count[index] += 1
16
+ self.weight[index] += weight
17
+ self.sum[index] += weight * value
18
+ self.sqsum[index] += weight * value * value
19
+
20
+ def average(self):
21
+ valid = self.count > 0
22
+ avg = numpy.empty_like(self.sum)
23
+ avg[valid] = self.sum[valid] / self.weight[valid]
24
+ avg[~valid] = self.mask_value
25
+ return avg
26
+
27
+ mean = average
28
+
29
+ def std(self):
30
+ valid = self.count > 0
31
+ vavg = self.average()[valid]
32
+ std = numpy.empty_like(self.sqsum)
33
+ std[valid] = (self.sqsum[valid] / self.weight[valid] - vavg * vavg) ** 0.5
34
+ std[~valid] = self.mask_value
35
+ return std
@@ -0,0 +1,129 @@
1
+ import numpy
2
+
3
+
4
+ class EDF:
5
+ '''A class for creating and manipulating empirical distribution functions (cumulative
6
+ distribution functions derived from sample data).
7
+ '''
8
+
9
+ @staticmethod
10
+ def from_array(array):
11
+ edf = EDF(None, None)
12
+ edf.x = array[:, 0]
13
+ edf.F = array[:, 1]
14
+ edf.dF = numpy.diff(edf.F)
15
+ return edf
16
+
17
+ @staticmethod
18
+ def from_arrays(x, F):
19
+ edf = EDF(None, None)
20
+ edf.x = x
21
+ edf.F = F
22
+ edf.dF = numpy.diff(edf.F)
23
+ return edf
24
+
25
+ def __init__(self, values, weights=None):
26
+ '''Construct a new EDF from the given values and (optionally) weights.'''
27
+
28
+ if values is None:
29
+ self.x = None
30
+ self.F = None
31
+ self.dF = None
32
+ return
33
+
34
+ if weights is None:
35
+ weights = numpy.ones((len(values)), numpy.float64)
36
+ elif numpy.isscalar(weights):
37
+ tweights = numpy.empty((len(values)), numpy.float64)
38
+ tweights[:] = weights
39
+ weights = tweights
40
+ else:
41
+ if len(weights) != len(values):
42
+ raise TypeError('values and weights have different lengths')
43
+
44
+ # Sort values
45
+ sort_indices = numpy.argsort(values, kind='stable')
46
+ values = values[sort_indices]
47
+ weights = weights[sort_indices]
48
+
49
+ # Determine unique abcissae; this is essentially stolen from numpy.lib.arraysetops.unique()
50
+ x = values[numpy.concatenate(([True], values[1:] != values[:-1]))]
51
+ F = numpy.empty((len(x),), numpy.float64)
52
+
53
+ # ``values`` is arranged in increasing order, so we can walk along it and add up weights
54
+ # as we go
55
+ ival_last = 0
56
+ ival = 0
57
+ for ibin in range(0, len(x)):
58
+ while ival < len(values) and values[ival] <= x[ibin]:
59
+ ival += 1
60
+ F[ibin] = weights[ival_last:ival].sum()
61
+ ival_last = ival
62
+ F = numpy.add.accumulate(F)
63
+ F /= F[-1]
64
+
65
+ self.x = x
66
+ self.F = F
67
+ self.dF = numpy.diff(F)
68
+
69
+ def __len__(self):
70
+ return len(self.x)
71
+
72
+ def __call__(self, x):
73
+ '''Evaluate this EDF at the given abcissae.'''
74
+ indices = numpy.digitize(x, self.x)
75
+ indices[indices >= len(self.x)] = len(self.x) - 1
76
+ return self.F[indices]
77
+
78
+ def as_array(self):
79
+ '''Return this EDF as a (N,2) array, where N is the number of unique values passed to
80
+ the constructor. Numpy type casting rules are applied (so, for instance, integral abcissae
81
+ are converted to floating-point values).'''
82
+
83
+ result = numpy.empty((len(self.F), 2), dtype=numpy.result_type(self.x, self.F))
84
+ result[:, 0] = self.x
85
+ result[:, 1] = self.F
86
+ return result
87
+
88
+ def quantiles(self, p):
89
+ '''Treating the EDF as a quantile function, return the values of the (statistical) variable whose
90
+ probabilities are at least p. That is, Q(p) = inf {x: p <= F(x) }.'''
91
+
92
+ indices = numpy.searchsorted(self.F, p)
93
+ indices[indices >= len(self.x)] = len(self.x) - 1
94
+ return self.x[indices]
95
+
96
+ def quantile(self, p):
97
+ return self.quantiles([p])[0]
98
+
99
+ def median(self):
100
+ return self.quantiles([0.5])[0]
101
+
102
+ def moment(self, n):
103
+ '''Calculate the nth moment of this probability distribution
104
+
105
+ <x^n> = int_{-inf}^{inf} x^n dF(x)
106
+ '''
107
+
108
+ if n == 1:
109
+ return (self.x[:-1] * self.dF).sum()
110
+ else:
111
+ return (self.x[:-1] ** n * self.dF).sum()
112
+
113
+ def cmoment(self, n):
114
+ '''Calculate the nth central moment of this probability distribution'''
115
+
116
+ if n < 2:
117
+ return 0
118
+ return ((self.x[:-1] - self.moment(1)) ** n * self.dF).sum()
119
+
120
+ def mean(self):
121
+ return self.moment(1)
122
+
123
+ def var(self):
124
+ '''Return the second central moment of this probability distribution.'''
125
+ return self.cmoment(2)
126
+
127
+ def std(self):
128
+ '''Return the standard deviation (root of the variance) of this probability distribution.'''
129
+ return self.cmoment(2) ** 0.5
@@ -0,0 +1,96 @@
1
+ '''
2
+ Tools for Monte Carlo bootstrap error analysis
3
+ '''
4
+
5
+ import math
6
+
7
+ import numpy as np
8
+ from numpy.random import Generator, MT19937
9
+
10
+
11
+ def msort(input_array):
12
+ return np.sort(input_array, axis=0)
13
+
14
+
15
+ def add_mcbs_options(parser):
16
+ '''Add arguments concerning Monte Carlo bootstrap (``confidence`` and ``bssize``) to the given parser'''
17
+
18
+ group = parser.add_argument_group('bootstrapping options')
19
+ group.add_argument(
20
+ '--confidence',
21
+ dest='confidence',
22
+ type=float,
23
+ default=0.95,
24
+ help='Construct a confidence interval of width CONFIDENCE (default: 0.95=95%%)',
25
+ )
26
+ group.add_argument(
27
+ '--bssize',
28
+ dest='bssize',
29
+ type=int,
30
+ help='Use a bootstrap of BSSIZE samples to calculate error (default: chosen from confidence)',
31
+ )
32
+
33
+
34
+ def get_bssize(alpha):
35
+ '''Return a bootstrap data set size appropriate for the given confidence level'''
36
+ return int(10 ** (math.ceil(-math.log10(alpha)) + 1))
37
+
38
+
39
+ def bootstrap_ci(estimator, data, alpha, n_sets=None, args=(), kwargs={}, sort=msort, extended_output=False):
40
+ '''Perform a Monte Carlo bootstrap of a (1-alpha) confidence interval for the given ``estimator``.
41
+ Returns (fhat, ci_lower, ci_upper), where fhat is the result of ``estimator(data, *args, **kwargs)``,
42
+ and ``ci_lower`` and ``ci_upper`` are the lower and upper bounds of the surrounding confidence
43
+ interval, calculated by calling ``estimator(syndata, *args, **kwargs)`` on each synthetic data
44
+ set ``syndata``. If ``n_sets`` is provided, that is the number of synthetic data sets generated,
45
+ otherwise an appropriate size is selected automatically (see ``get_bssize()``).
46
+
47
+ ``sort``, if given, is applied to sort the results of calling ``estimator`` on each
48
+ synthetic data set prior to obtaining the confidence interval.
49
+
50
+ Individual entries in synthetic data sets are selected by the first index of ``data``, allowing this
51
+ function to be used on arrays of multidimensional data.
52
+
53
+ If ``extended_output`` is True (by default not), instead of returning (fhat, lb, ub), this function returns
54
+ (fhat, lb, ub, ub-lb, abs((ub-lb)/fhat), and max(ub-fhat,fhat-lb)) (that is, the estimated value, the
55
+ lower and upper bounds of the confidence interval, the width of the confidence interval, the relative
56
+ width of the confidence interval, and the symmetrized error bar of the confidence interval).'''
57
+
58
+ data = np.asanyarray(data)
59
+
60
+ fhat = estimator(data, *args, **kwargs)
61
+
62
+ try:
63
+ estimator_shape = fhat.shape
64
+ except AttributeError:
65
+ estimator_shape = ()
66
+
67
+ try:
68
+ estimator_dtype = fhat.dtype
69
+ except AttributeError:
70
+ estimator_dtype = type(fhat)
71
+
72
+ dlen = len(data)
73
+ n_sets = n_sets or get_bssize(alpha)
74
+
75
+ f_synth = np.empty((n_sets,) + estimator_shape, dtype=estimator_dtype)
76
+
77
+ rng = Generator(MT19937())
78
+
79
+ for i in range(0, n_sets):
80
+ indices = rng.integers(dlen, size=(dlen,))
81
+ f_synth[i] = estimator(data[indices], *args, **kwargs)
82
+
83
+ f_synth_sorted = sort(f_synth)
84
+ lbi = int(math.floor(n_sets * alpha / 2))
85
+ ubi = int(math.ceil(n_sets * (1 - alpha / 2)))
86
+ lb = f_synth_sorted[lbi]
87
+ ub = f_synth_sorted[ubi]
88
+
89
+ try:
90
+ if extended_output:
91
+ return (fhat, lb, ub, ub - lb, abs((ub - lb) / fhat) if fhat else 0, max(ub - fhat, fhat - lb))
92
+ else:
93
+ return (fhat, lb, ub)
94
+ finally:
95
+ # Do a little explicit memory management
96
+ del f_synth, f_synth_sorted