westpa 2022.10__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of westpa might be problematic. Click here for more details.

Files changed (150) hide show
  1. westpa/__init__.py +14 -0
  2. westpa/_version.py +21 -0
  3. westpa/analysis/__init__.py +5 -0
  4. westpa/analysis/core.py +746 -0
  5. westpa/analysis/statistics.py +27 -0
  6. westpa/analysis/trajectories.py +360 -0
  7. westpa/cli/__init__.py +0 -0
  8. westpa/cli/core/__init__.py +0 -0
  9. westpa/cli/core/w_fork.py +152 -0
  10. westpa/cli/core/w_init.py +230 -0
  11. westpa/cli/core/w_run.py +77 -0
  12. westpa/cli/core/w_states.py +212 -0
  13. westpa/cli/core/w_succ.py +99 -0
  14. westpa/cli/core/w_truncate.py +59 -0
  15. westpa/cli/tools/__init__.py +0 -0
  16. westpa/cli/tools/ploterr.py +506 -0
  17. westpa/cli/tools/plothist.py +706 -0
  18. westpa/cli/tools/w_assign.py +596 -0
  19. westpa/cli/tools/w_bins.py +166 -0
  20. westpa/cli/tools/w_crawl.py +119 -0
  21. westpa/cli/tools/w_direct.py +547 -0
  22. westpa/cli/tools/w_dumpsegs.py +94 -0
  23. westpa/cli/tools/w_eddist.py +506 -0
  24. westpa/cli/tools/w_fluxanl.py +378 -0
  25. westpa/cli/tools/w_ipa.py +833 -0
  26. westpa/cli/tools/w_kinavg.py +127 -0
  27. westpa/cli/tools/w_kinetics.py +96 -0
  28. westpa/cli/tools/w_multi_west.py +414 -0
  29. westpa/cli/tools/w_ntop.py +213 -0
  30. westpa/cli/tools/w_pdist.py +515 -0
  31. westpa/cli/tools/w_postanalysis_matrix.py +82 -0
  32. westpa/cli/tools/w_postanalysis_reweight.py +53 -0
  33. westpa/cli/tools/w_red.py +486 -0
  34. westpa/cli/tools/w_reweight.py +780 -0
  35. westpa/cli/tools/w_select.py +226 -0
  36. westpa/cli/tools/w_stateprobs.py +111 -0
  37. westpa/cli/tools/w_trace.py +599 -0
  38. westpa/core/__init__.py +0 -0
  39. westpa/core/_rc.py +673 -0
  40. westpa/core/binning/__init__.py +55 -0
  41. westpa/core/binning/_assign.cpython-312-darwin.so +0 -0
  42. westpa/core/binning/assign.py +449 -0
  43. westpa/core/binning/binless.py +96 -0
  44. westpa/core/binning/binless_driver.py +54 -0
  45. westpa/core/binning/binless_manager.py +190 -0
  46. westpa/core/binning/bins.py +47 -0
  47. westpa/core/binning/mab.py +427 -0
  48. westpa/core/binning/mab_driver.py +54 -0
  49. westpa/core/binning/mab_manager.py +198 -0
  50. westpa/core/data_manager.py +1694 -0
  51. westpa/core/extloader.py +74 -0
  52. westpa/core/h5io.py +995 -0
  53. westpa/core/kinetics/__init__.py +24 -0
  54. westpa/core/kinetics/_kinetics.cpython-312-darwin.so +0 -0
  55. westpa/core/kinetics/events.py +147 -0
  56. westpa/core/kinetics/matrates.py +156 -0
  57. westpa/core/kinetics/rate_averaging.py +266 -0
  58. westpa/core/progress.py +218 -0
  59. westpa/core/propagators/__init__.py +54 -0
  60. westpa/core/propagators/executable.py +715 -0
  61. westpa/core/reweight/__init__.py +14 -0
  62. westpa/core/reweight/_reweight.cpython-312-darwin.so +0 -0
  63. westpa/core/reweight/matrix.py +126 -0
  64. westpa/core/segment.py +119 -0
  65. westpa/core/sim_manager.py +830 -0
  66. westpa/core/states.py +359 -0
  67. westpa/core/systems.py +93 -0
  68. westpa/core/textio.py +74 -0
  69. westpa/core/trajectory.py +330 -0
  70. westpa/core/we_driver.py +908 -0
  71. westpa/core/wm_ops.py +43 -0
  72. westpa/core/yamlcfg.py +391 -0
  73. westpa/fasthist/__init__.py +34 -0
  74. westpa/fasthist/__main__.py +110 -0
  75. westpa/fasthist/_fasthist.cpython-312-darwin.so +0 -0
  76. westpa/mclib/__init__.py +264 -0
  77. westpa/mclib/__main__.py +28 -0
  78. westpa/mclib/_mclib.cpython-312-darwin.so +0 -0
  79. westpa/oldtools/__init__.py +4 -0
  80. westpa/oldtools/aframe/__init__.py +35 -0
  81. westpa/oldtools/aframe/atool.py +75 -0
  82. westpa/oldtools/aframe/base_mixin.py +26 -0
  83. westpa/oldtools/aframe/binning.py +178 -0
  84. westpa/oldtools/aframe/data_reader.py +560 -0
  85. westpa/oldtools/aframe/iter_range.py +200 -0
  86. westpa/oldtools/aframe/kinetics.py +117 -0
  87. westpa/oldtools/aframe/mcbs.py +146 -0
  88. westpa/oldtools/aframe/output.py +39 -0
  89. westpa/oldtools/aframe/plotting.py +90 -0
  90. westpa/oldtools/aframe/trajwalker.py +126 -0
  91. westpa/oldtools/aframe/transitions.py +469 -0
  92. westpa/oldtools/cmds/__init__.py +0 -0
  93. westpa/oldtools/cmds/w_ttimes.py +358 -0
  94. westpa/oldtools/files.py +34 -0
  95. westpa/oldtools/miscfn.py +23 -0
  96. westpa/oldtools/stats/__init__.py +4 -0
  97. westpa/oldtools/stats/accumulator.py +35 -0
  98. westpa/oldtools/stats/edfs.py +129 -0
  99. westpa/oldtools/stats/mcbs.py +89 -0
  100. westpa/tools/__init__.py +33 -0
  101. westpa/tools/binning.py +472 -0
  102. westpa/tools/core.py +340 -0
  103. westpa/tools/data_reader.py +159 -0
  104. westpa/tools/dtypes.py +31 -0
  105. westpa/tools/iter_range.py +198 -0
  106. westpa/tools/kinetics_tool.py +340 -0
  107. westpa/tools/plot.py +283 -0
  108. westpa/tools/progress.py +17 -0
  109. westpa/tools/selected_segs.py +154 -0
  110. westpa/tools/wipi.py +751 -0
  111. westpa/trajtree/__init__.py +4 -0
  112. westpa/trajtree/_trajtree.cpython-312-darwin.so +0 -0
  113. westpa/trajtree/trajtree.py +117 -0
  114. westpa/westext/__init__.py +0 -0
  115. westpa/westext/adaptvoronoi/__init__.py +3 -0
  116. westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
  117. westpa/westext/hamsm_restarting/__init__.py +3 -0
  118. westpa/westext/hamsm_restarting/example_overrides.py +35 -0
  119. westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
  120. westpa/westext/stringmethod/__init__.py +11 -0
  121. westpa/westext/stringmethod/fourier_fitting.py +69 -0
  122. westpa/westext/stringmethod/string_driver.py +253 -0
  123. westpa/westext/stringmethod/string_method.py +306 -0
  124. westpa/westext/weed/BinCluster.py +180 -0
  125. westpa/westext/weed/ProbAdjustEquil.py +100 -0
  126. westpa/westext/weed/UncertMath.py +247 -0
  127. westpa/westext/weed/__init__.py +10 -0
  128. westpa/westext/weed/weed_driver.py +182 -0
  129. westpa/westext/wess/ProbAdjust.py +101 -0
  130. westpa/westext/wess/__init__.py +6 -0
  131. westpa/westext/wess/wess_driver.py +207 -0
  132. westpa/work_managers/__init__.py +57 -0
  133. westpa/work_managers/core.py +396 -0
  134. westpa/work_managers/environment.py +134 -0
  135. westpa/work_managers/mpi.py +318 -0
  136. westpa/work_managers/processes.py +187 -0
  137. westpa/work_managers/serial.py +28 -0
  138. westpa/work_managers/threads.py +79 -0
  139. westpa/work_managers/zeromq/__init__.py +20 -0
  140. westpa/work_managers/zeromq/core.py +641 -0
  141. westpa/work_managers/zeromq/node.py +131 -0
  142. westpa/work_managers/zeromq/work_manager.py +526 -0
  143. westpa/work_managers/zeromq/worker.py +320 -0
  144. westpa-2022.10.dist-info/AUTHORS +22 -0
  145. westpa-2022.10.dist-info/LICENSE +21 -0
  146. westpa-2022.10.dist-info/METADATA +183 -0
  147. westpa-2022.10.dist-info/RECORD +150 -0
  148. westpa-2022.10.dist-info/WHEEL +5 -0
  149. westpa-2022.10.dist-info/entry_points.txt +29 -0
  150. westpa-2022.10.dist-info/top_level.txt +1 -0
@@ -0,0 +1,358 @@
1
+ import argparse
2
+ import logging
3
+
4
+ import numpy as np
5
+
6
+ import westpa
7
+
8
+ from oldtools.aframe import (
9
+ WESTAnalysisTool,
10
+ BinningMixin,
11
+ WESTDataReaderMixin,
12
+ IterRangeMixin,
13
+ MCBSMixin,
14
+ TransitionAnalysisMixin,
15
+ KineticsAnalysisMixin,
16
+ CommonOutputMixin,
17
+ BFDataManager,
18
+ BFTransitionAnalysisMixin,
19
+ )
20
+
21
+ log = logging.getLogger('w_ttimes')
22
+
23
+
24
+ ciinfo_dtype = np.dtype([('expectation', np.float64), ('ci_lower', np.float64), ('ci_upper', np.float64)])
25
+
26
+
27
+ class WTTimesBase:
28
+ def __init__(self):
29
+ super().__init__()
30
+
31
+ self.ed_stats_filename = None
32
+ self.fpt_stats_filename = None
33
+ self.flux_stats_filename = None
34
+ self.rate_stats_filename = None
35
+ self.suppress_headers = None
36
+ self.print_bin_labels = None
37
+
38
+ self.ttimes_group = None
39
+
40
+ self.durations = None
41
+ self.fpts = None
42
+ self.fluxes = None
43
+ self.rates = None
44
+
45
+ def add_args(self, parser, upcall=True):
46
+ '''Add arguments to a parser common to all analyses of this type.'''
47
+ if upcall:
48
+ try:
49
+ upfunc = super().add_args
50
+ except AttributeError:
51
+ pass
52
+ else:
53
+ upfunc(parser)
54
+
55
+ output_options = parser.add_argument_group('kinetics analysis output options')
56
+ output_options.add_argument(
57
+ '--edstats',
58
+ dest='ed_stats',
59
+ default='edstats.txt',
60
+ help='Store event duration statistics in ED_STATS (default: edstats.txt)',
61
+ )
62
+ if self.bf_mode:
63
+ output_options.add_argument(
64
+ '--fptstats',
65
+ dest='fpt_stats',
66
+ default='fptstats.txt',
67
+ help='Store first passage time statistics in FPT_STATS (default: fptstats.txt).',
68
+ )
69
+ else:
70
+ output_options.add_argument(
71
+ '--fptstats', dest='fpt_stats', help='Store first passage time statistics in FPT_STATS (default: do not store).'
72
+ )
73
+ output_options.add_argument(
74
+ '--fluxstats',
75
+ dest='flux_stats',
76
+ default='fluxstats.txt',
77
+ help='Store flux statistics in FLUX_STATS (default: fluxstats.txt)',
78
+ )
79
+ output_options.add_argument(
80
+ '--ratestats',
81
+ dest='rate_stats',
82
+ default='ratestats.txt',
83
+ help='Store rate statistics in RATE_STATS (default: ratestats.txt)',
84
+ )
85
+ self.add_common_output_args(output_options)
86
+
87
+ def process_args(self, args, upcall=True):
88
+ self.ed_stats_filename = args.ed_stats
89
+ self.fpt_stats_filename = args.fpt_stats
90
+ self.flux_stats_filename = args.flux_stats
91
+ self.rate_stats_filename = args.rate_stats
92
+ self.process_common_output_args(args)
93
+ self.calc_fpts = bool(args.fpt_stats)
94
+
95
+ if upcall:
96
+ try:
97
+ upfunc = super().process_args
98
+ except AttributeError:
99
+ pass
100
+ else:
101
+ upfunc(args)
102
+
103
+ def gen_stats(self):
104
+ self.require_transitions_group()
105
+ westpa.rc.pstatus('Analyzing transition statistics...')
106
+
107
+ dt = self.dt
108
+ n_sets = self.mcbs_nsets
109
+ lbi, ubi = self.calc_ci_bound_indices()
110
+
111
+ total_time = self.get_total_time()
112
+ transdat_ds = self.trans_h5group['transitions']
113
+ transdat_ibin = transdat_ds['initial_bin']
114
+
115
+ if not self.bf_mode:
116
+ transdat_niter = transdat_ds['n_iter']
117
+ transdat_in_range = (transdat_niter >= self.first_iter) & (transdat_niter <= self.last_iter)
118
+
119
+ durations = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
120
+ fpts = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
121
+ fluxes = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
122
+ rates = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
123
+
124
+ syn_avg_durations = np.empty((n_sets,), np.float64)
125
+ syn_avg_fpts = np.empty((n_sets,), np.float64)
126
+ syn_avg_fluxes = np.empty((n_sets,), np.float64)
127
+ syn_avg_rates = np.empty((n_sets,), np.float64)
128
+
129
+ w_n_bins = len(str(self.n_bins))
130
+ w_n_sets = len(str(n_sets))
131
+
132
+ for ibin in self.analysis_initial_bins:
133
+ if self.bf_mode:
134
+ trans_ibin = transdat_ds[transdat_ibin == ibin]
135
+ else:
136
+ trans_ibin = transdat_ds[(transdat_ibin == ibin) & transdat_in_range]
137
+
138
+ for fbin in self.analysis_final_bins:
139
+ # trans_ifbins = trans_ibin[trans_ibin['final_bin'] == fbin]
140
+ trans_ifbins = np.extract(trans_ibin['final_bin'] == fbin, trans_ibin)
141
+ dlen = len(trans_ifbins)
142
+
143
+ if not dlen:
144
+ continue
145
+
146
+ trans_weights = trans_ifbins['final_weight']
147
+ trans_durations = trans_ifbins['duration']
148
+ trans_fpts = trans_ifbins['fpt']
149
+ trans_ibinprobs = trans_ifbins['initial_bin_pop']
150
+
151
+ durations[ibin, fbin]['expectation'] = np.average(trans_durations, weights=trans_weights) * dt
152
+ fpts[ibin, fbin]['expectation'] = np.average(trans_fpts, weights=trans_weights) * dt
153
+ avg_flux = trans_weights.sum() / total_time
154
+ fluxes[ibin, fbin]['expectation'] = avg_flux
155
+ rates[ibin, fbin]['expectation'] = avg_flux / trans_ibinprobs.mean()
156
+
157
+ for iset in range(n_sets):
158
+ westpa.rc.pstatus(
159
+ '\r {:{w_n_bins}d}->{:<{w_n_bins}d} set {:{w_n_sets}d}/{:<{w_n_sets}d}, set size {:<20d}'.format(
160
+ ibin, fbin, iset + 1, n_sets, dlen, w_n_bins=w_n_bins, w_n_sets=w_n_sets
161
+ ),
162
+ end='',
163
+ )
164
+ westpa.rc.pflush()
165
+ indices = np.random.randint(dlen, size=(dlen,))
166
+ # syn_weights = trans_weights[indices]
167
+ # syn_durations = trans_durations[indices]
168
+ # syn_fpts = trans_fpts[indices]
169
+ # syn_ibinprobs = trans_ibinprobs[indices]
170
+ syn_weights = trans_weights.take(indices)
171
+ syn_durations = trans_durations.take(indices)
172
+ syn_fpts = trans_fpts.take(indices)
173
+ syn_ibinprobs = trans_ibinprobs.take(indices)
174
+
175
+ syn_avg_durations[iset] = np.average(syn_durations, weights=syn_weights) * dt
176
+ syn_avg_fpts[iset] = np.average(syn_fpts, weights=syn_weights) * dt
177
+ syn_avg_fluxes[iset] = syn_weights.sum() / total_time
178
+ syn_avg_rates[iset] = syn_avg_fluxes[iset] / syn_ibinprobs.mean()
179
+
180
+ del indices, syn_weights, syn_durations, syn_ibinprobs, syn_fpts
181
+
182
+ syn_avg_durations.sort()
183
+ syn_avg_fpts.sort()
184
+ syn_avg_fluxes.sort()
185
+ syn_avg_rates.sort()
186
+
187
+ durations[ibin, fbin]['ci_lower'] = syn_avg_durations[lbi]
188
+ durations[ibin, fbin]['ci_upper'] = syn_avg_durations[ubi]
189
+
190
+ fpts[ibin, fbin]['ci_lower'] = syn_avg_fpts[lbi]
191
+ fpts[ibin, fbin]['ci_upper'] = syn_avg_fpts[ubi]
192
+
193
+ fluxes[ibin, fbin]['ci_lower'] = syn_avg_fluxes[lbi]
194
+ fluxes[ibin, fbin]['ci_upper'] = syn_avg_fluxes[ubi]
195
+
196
+ rates[ibin, fbin]['ci_lower'] = syn_avg_rates[lbi]
197
+ rates[ibin, fbin]['ci_upper'] = syn_avg_rates[ubi]
198
+
199
+ del trans_weights, trans_durations, trans_ibinprobs, trans_ifbins, trans_fpts
200
+ westpa.rc.pstatus()
201
+ del trans_ibin
202
+
203
+ for dsname, data in (('duration', durations), ('fpt', fpts), ('flux', fluxes), ('rate', rates)):
204
+ try:
205
+ del self.ttimes_group[dsname]
206
+ except KeyError:
207
+ pass
208
+
209
+ ds = self.ttimes_group.create_dataset(dsname, data=data)
210
+ attrs = ds.attrs
211
+ attrs['dt'] = dt
212
+ attrs['total_time'] = total_time
213
+ attrs['ci_alpha'] = self.mcbs_alpha
214
+ attrs['ci_n_sets'] = self.mcbs_nsets
215
+
216
+ if not self.bf_mode:
217
+ self.record_data_iter_range(ds)
218
+ self.record_data_binhash(ds)
219
+
220
+ attrs = self.ttimes_group.attrs
221
+ attrs = ds.attrs
222
+ attrs['dt'] = dt
223
+ attrs['total_time'] = total_time
224
+ attrs['ci_alpha'] = self.mcbs_alpha
225
+ attrs['ci_n_sets'] = self.mcbs_nsets
226
+
227
+ self.durations = durations
228
+ self.fluxes = fluxes
229
+ self.rates = rates
230
+
231
+ if not self.bf_mode:
232
+ self.record_data_iter_range(self.ttimes_group)
233
+ self.record_data_binhash(self.ttimes_group)
234
+
235
+ def summarize_stats(self):
236
+ for array, dsname, argname, title in (
237
+ (self.durations, 'duration', 'ed_stats_filename', 'event duration'),
238
+ (self.fpts, 'fpt', 'fpt_stats_filename', 'first passage time'),
239
+ (self.fluxes, 'flux', 'flux_stats_filename', 'flux'),
240
+ (self.rates, 'rate', 'rate_stats_filename', 'rate'),
241
+ ):
242
+ filename = getattr(self, argname)
243
+ if filename:
244
+ if array is None:
245
+ try:
246
+ array = self.ttimes_group[dsname]
247
+ except KeyError:
248
+ westpa.rc.pstatus('{} data not found in {}'.format(title, self.anal_h5name))
249
+ continue
250
+
251
+ self.summarize_ci(
252
+ filename,
253
+ array,
254
+ title,
255
+ self.mcbs_display_confidence,
256
+ headers=(not self.suppress_headers),
257
+ labels=self.print_bin_labels,
258
+ )
259
+
260
+ def summarize_ci(self, filename, array, title, confidence, headers, labels):
261
+ format_2d = (
262
+ '{ibin:{mw}d} {fbin:{mw}d} {0:20.15g} {1:20.15g} {2:20.15g} {3:20.15g} {4:20.15g} {5:20.15g}\n'
263
+ )
264
+ max_ibin_width = len(str(self.n_bins - 1))
265
+
266
+ outfile = open(filename, 'wt')
267
+ if headers:
268
+ outfile.write(
269
+ '''\
270
+ # {title:} statistics
271
+ # confidence interval = {confidence}%
272
+ # ----
273
+ # column 0: initial bin index
274
+ # column 1: final bin index
275
+ # column 2: lower bound of confidence interval
276
+ # column 3: upper bound of confidence interval
277
+ # column 4: width of confidence interval
278
+ # column 5: relative width of confidence interval [abs(width/average)]
279
+ # column 6: symmetrized error [max(upper-average, average-lower)]
280
+ # ----
281
+ '''.format(
282
+ title=title, confidence=confidence
283
+ )
284
+ )
285
+ if labels:
286
+ self.write_bin_labels(outfile)
287
+ outfile.write('----\n')
288
+
289
+ for ibin in self.analysis_initial_bins:
290
+ for fbin in self.analysis_final_bins:
291
+ mean = array[ibin, fbin]['expectation']
292
+ lb = array[ibin, fbin]['ci_lower']
293
+ ub = array[ibin, fbin]['ci_upper']
294
+ ciwidth = ub - lb
295
+ relciwidth = abs(ciwidth / mean)
296
+ symmerr = max(mean - lb, ub - mean)
297
+
298
+ outfile.write(
299
+ format_2d.format(
300
+ *list(map(float, (mean, lb, ub, ciwidth, relciwidth, symmerr))), ibin=ibin, fbin=fbin, mw=max_ibin_width
301
+ )
302
+ )
303
+
304
+ def main(self):
305
+ parser = argparse.ArgumentParser('w_ttimes', description=self.description)
306
+ westpa.rc.add_args(parser)
307
+ self.add_args(parser)
308
+
309
+ args = parser.parse_args()
310
+ westpa.rc.process_args(args, config_required=False)
311
+ self.process_args(args)
312
+
313
+ self.check_iter_range()
314
+ self.check_bin_selection()
315
+ self.open_analysis_backing()
316
+ self.ttimes_group = self.require_analysis_group('w_ttimes', replace=False)
317
+ self.require_bin_assignments()
318
+ self.require_transitions()
319
+ self.gen_stats()
320
+ self.summarize_stats()
321
+
322
+
323
+ class WTTimesWE(
324
+ WTTimesBase,
325
+ CommonOutputMixin,
326
+ MCBSMixin,
327
+ KineticsAnalysisMixin,
328
+ TransitionAnalysisMixin,
329
+ BinningMixin,
330
+ IterRangeMixin,
331
+ WESTDataReaderMixin,
332
+ WESTAnalysisTool,
333
+ ):
334
+ description = 'Trace the WEST trajectory tree and report on transition kinetics.'
335
+
336
+ def __init__(self):
337
+ super().__init__()
338
+
339
+
340
+ class WTTimesBF(
341
+ WTTimesBase, CommonOutputMixin, MCBSMixin, KineticsAnalysisMixin, BFTransitionAnalysisMixin, BFDataManager, WESTAnalysisTool
342
+ ):
343
+ description = 'Trace one or more brute force trajectories and report on transition kinetics.'
344
+ default_chunksize = 65536 * 4
345
+
346
+ def __init__(self):
347
+ super().__init__()
348
+ self.bf_mode = True
349
+ self.config_required = False
350
+ self.usecols = None
351
+ self.input_files = None
352
+
353
+ def check_iter_range(self):
354
+ pass # do nothing, since we don't do iteration ranges for brute force
355
+
356
+ def get_total_time(self):
357
+ self.require_bf_h5file()
358
+ return np.add.reduce([self.get_traj_len(traj_id) - 1 for traj_id in range(self.get_n_trajs())]) * self.dt
@@ -0,0 +1,34 @@
1
+ import warnings
2
+ import numpy
3
+
4
+
5
+ def load_npy_or_text(filename):
6
+ '''Load an array from an existing .npy file, or read a text file and
7
+ convert to a NumPy array. In either case, return a NumPy array. If a
8
+ pickled NumPy dataset is found, memory-map it read-only. If the specified
9
+ file does not contain a pickled NumPy array, attempt to read the file using
10
+ numpy.loadtxt(filename, **kwargs).'''
11
+
12
+ f = open(filename, 'rb')
13
+ try:
14
+ f.seek(0)
15
+ except IOError:
16
+ # Not seekable - assume a text stream
17
+ return numpy.loadtxt(filename)
18
+ else:
19
+ f.close()
20
+
21
+ # File is seekable
22
+ try:
23
+ # try to mmap it
24
+ with warnings.catch_warnings():
25
+ warnings.simplefilter("ignore")
26
+ return numpy.load(filename, 'r')
27
+
28
+ except IOError as e:
29
+ if 'Failed to interpret' in str(e):
30
+ pass
31
+ else:
32
+ raise
33
+
34
+ return numpy.loadtxt(filename)
@@ -0,0 +1,23 @@
1
+ '''Miscellaneous support functions for WEST and WEST tools'''
2
+
3
+ import re
4
+
5
+
6
+ def parse_int_list(list_string):
7
+ '''Parse a simple list consisting of integers or ranges of integers separated by commas. Ranges are specified
8
+ as min:max, and include the maximum value (unlike Python's ``range``). Duplicate values are ignored.
9
+ Returns the result as a sorted list. Raises ValueError if the list cannot be parsed.'''
10
+
11
+ try:
12
+ entries = set()
13
+ fields = re.split(r'\s*[;,]\s*', list_string)
14
+ for field in fields:
15
+ if ':' in field:
16
+ lb, ub = list(map(int, re.split(r'\s*:\s*', field)))
17
+ entries.update(list(range(lb, ub + 1)))
18
+ else:
19
+ entries.add(int(field))
20
+ except (ValueError, TypeError):
21
+ raise ValueError('invalid integer range string {!r}'.format(list_string))
22
+ else:
23
+ return sorted(entries)
@@ -0,0 +1,4 @@
1
+ from . import accumulator # noqa
2
+ from .accumulator import RunningStatsAccumulator # noqa
3
+
4
+ from . import mcbs # noqa
@@ -0,0 +1,35 @@
1
+ import numpy
2
+
3
+ NAN = float('nan')
4
+
5
+
6
+ class RunningStatsAccumulator:
7
+ def __init__(self, shape, dtype=numpy.float64, count_dtype=numpy.uint, weight_dtype=numpy.float64, mask_value=NAN):
8
+ self.sum = numpy.zeros(shape, dtype)
9
+ self.sqsum = numpy.zeros(shape, dtype)
10
+ self.weight = numpy.zeros(shape, weight_dtype)
11
+ self.count = numpy.zeros(shape, count_dtype)
12
+ self.mask_value = mask_value
13
+
14
+ def incorporate(self, index, value, weight):
15
+ self.count[index] += 1
16
+ self.weight[index] += weight
17
+ self.sum[index] += weight * value
18
+ self.sqsum[index] += weight * value * value
19
+
20
+ def average(self):
21
+ valid = self.count > 0
22
+ avg = numpy.empty_like(self.sum)
23
+ avg[valid] = self.sum[valid] / self.weight[valid]
24
+ avg[~valid] = self.mask_value
25
+ return avg
26
+
27
+ mean = average
28
+
29
+ def std(self):
30
+ valid = self.count > 0
31
+ vavg = self.average()[valid]
32
+ std = numpy.empty_like(self.sqsum)
33
+ std[valid] = (self.sqsum[valid] / self.weight[valid] - vavg * vavg) ** 0.5
34
+ std[~valid] = self.mask_value
35
+ return std
@@ -0,0 +1,129 @@
1
+ import numpy
2
+
3
+
4
+ class EDF:
5
+ '''A class for creating and manipulating empirical distribution functions (cumulative
6
+ distribution functions derived from sample data).
7
+ '''
8
+
9
+ @staticmethod
10
+ def from_array(array):
11
+ edf = EDF(None, None)
12
+ edf.x = array[:, 0]
13
+ edf.F = array[:, 1]
14
+ edf.dF = numpy.diff(edf.F)
15
+ return edf
16
+
17
+ @staticmethod
18
+ def from_arrays(x, F):
19
+ edf = EDF(None, None)
20
+ edf.x = x
21
+ edf.F = F
22
+ edf.dF = numpy.diff(edf.F)
23
+ return edf
24
+
25
+ def __init__(self, values, weights=None):
26
+ '''Construct a new EDF from the given values and (optionally) weights.'''
27
+
28
+ if values is None:
29
+ self.x = None
30
+ self.F = None
31
+ self.dF = None
32
+ return
33
+
34
+ if weights is None:
35
+ weights = numpy.ones((len(values)), numpy.float64)
36
+ elif numpy.isscalar(weights):
37
+ tweights = numpy.empty((len(values)), numpy.float64)
38
+ tweights[:] = weights
39
+ weights = tweights
40
+ else:
41
+ if len(weights) != len(values):
42
+ raise TypeError('values and weights have different lengths')
43
+
44
+ # Sort values
45
+ sort_indices = numpy.argsort(values)
46
+ values = values[sort_indices]
47
+ weights = weights[sort_indices]
48
+
49
+ # Determine unique abcissae; this is essentially stolen from numpy.lib.arraysetops.unique()
50
+ x = values[numpy.concatenate(([True], values[1:] != values[:-1]))]
51
+ F = numpy.empty((len(x),), numpy.float64)
52
+
53
+ # ``values`` is arranged in increasing order, so we can walk along it and add up weights
54
+ # as we go
55
+ ival_last = 0
56
+ ival = 0
57
+ for ibin in range(0, len(x)):
58
+ while ival < len(values) and values[ival] <= x[ibin]:
59
+ ival += 1
60
+ F[ibin] = weights[ival_last:ival].sum()
61
+ ival_last = ival
62
+ F = numpy.add.accumulate(F)
63
+ F /= F[-1]
64
+
65
+ self.x = x
66
+ self.F = F
67
+ self.dF = numpy.diff(F)
68
+
69
+ def __len__(self):
70
+ return len(self.x)
71
+
72
+ def __call__(self, x):
73
+ '''Evaluate this EDF at the given abcissae.'''
74
+ indices = numpy.digitize(x, self.x)
75
+ indices[indices >= len(self.x)] = len(self.x) - 1
76
+ return self.F[indices]
77
+
78
+ def as_array(self):
79
+ '''Return this EDF as a (N,2) array, where N is the number of unique values passed to
80
+ the constructor. Numpy type casting rules are applied (so, for instance, integral abcissae
81
+ are converted to floating-point values).'''
82
+
83
+ result = numpy.empty((len(self.F), 2), dtype=numpy.result_type(self.x, self.F))
84
+ result[:, 0] = self.x
85
+ result[:, 1] = self.F
86
+ return result
87
+
88
+ def quantiles(self, p):
89
+ '''Treating the EDF as a quantile function, return the values of the (statistical) variable whose
90
+ probabilities are at least p. That is, Q(p) = inf {x: p <= F(x) }.'''
91
+
92
+ indices = numpy.searchsorted(self.F, p)
93
+ indices[indices >= len(self.x)] = len(self.x) - 1
94
+ return self.x[indices]
95
+
96
+ def quantile(self, p):
97
+ return self.quantiles([p])[0]
98
+
99
+ def median(self):
100
+ return self.quantiles([0.5])[0]
101
+
102
+ def moment(self, n):
103
+ '''Calculate the nth moment of this probability distribution
104
+
105
+ <x^n> = int_{-inf}^{inf} x^n dF(x)
106
+ '''
107
+
108
+ if n == 1:
109
+ return (self.x[:-1] * self.dF).sum()
110
+ else:
111
+ return (self.x[:-1] ** n * self.dF).sum()
112
+
113
+ def cmoment(self, n):
114
+ '''Calculate the nth central moment of this probability distribution'''
115
+
116
+ if n < 2:
117
+ return 0
118
+ return ((self.x[:-1] - self.moment(1)) ** n * self.dF).sum()
119
+
120
+ def mean(self):
121
+ return self.moment(1)
122
+
123
+ def var(self):
124
+ '''Return the second central moment of this probability distribution.'''
125
+ return self.cmoment(2)
126
+
127
+ def std(self):
128
+ '''Return the standard deviation (root of the variance) of this probability distribution.'''
129
+ return self.cmoment(2) ** 0.5
@@ -0,0 +1,89 @@
1
+ '''
2
+ Tools for Monte Carlo bootstrap error analysis
3
+ '''
4
+
5
+ import math
6
+
7
+ import numpy as np
8
+
9
+
10
+ def add_mcbs_options(parser):
11
+ '''Add arguments concerning Monte Carlo bootstrap (``confidence`` and ``bssize``) to the given parser'''
12
+
13
+ group = parser.add_argument_group('bootstrapping options')
14
+ group.add_argument(
15
+ '--confidence',
16
+ dest='confidence',
17
+ type=float,
18
+ default=0.95,
19
+ help='Construct a confidence interval of width CONFIDENCE (default: 0.95=95%%)',
20
+ )
21
+ group.add_argument(
22
+ '--bssize',
23
+ dest='bssize',
24
+ type=int,
25
+ help='Use a bootstrap of BSSIZE samples to calculate error (default: chosen from confidence)',
26
+ )
27
+
28
+
29
+ def get_bssize(alpha):
30
+ '''Return a bootstrap data set size appropriate for the given confidence level'''
31
+ return int(10 ** (math.ceil(-math.log10(alpha)) + 1))
32
+
33
+
34
+ def bootstrap_ci(estimator, data, alpha, n_sets=None, args=(), kwargs={}, sort=np.msort, extended_output=False):
35
+ '''Perform a Monte Carlo bootstrap of a (1-alpha) confidence interval for the given ``estimator``.
36
+ Returns (fhat, ci_lower, ci_upper), where fhat is the result of ``estimator(data, *args, **kwargs)``,
37
+ and ``ci_lower`` and ``ci_upper`` are the lower and upper bounds of the surrounding confidence
38
+ interval, calculated by calling ``estimator(syndata, *args, **kwargs)`` on each synthetic data
39
+ set ``syndata``. If ``n_sets`` is provided, that is the number of synthetic data sets generated,
40
+ otherwise an appropriate size is selected automatically (see ``get_bssize()``).
41
+
42
+ ``sort``, if given, is applied to sort the results of calling ``estimator`` on each
43
+ synthetic data set prior to obtaining the confidence interval.
44
+
45
+ Individual entries in synthetic data sets are selected by the first index of ``data``, allowing this
46
+ function to be used on arrays of multidimensional data.
47
+
48
+ If ``extended_output`` is True (by default not), instead of returning (fhat, lb, ub), this function returns
49
+ (fhat, lb, ub, ub-lb, abs((ub-lb)/fhat), and max(ub-fhat,fhat-lb)) (that is, the estimated value, the
50
+ lower and upper bounds of the confidence interval, the width of the confidence interval, the relative
51
+ width of the confidence interval, and the symmetrized error bar of the confidence interval).'''
52
+
53
+ data = np.asanyarray(data)
54
+
55
+ fhat = estimator(data, *args, **kwargs)
56
+
57
+ try:
58
+ estimator_shape = fhat.shape
59
+ except AttributeError:
60
+ estimator_shape = ()
61
+
62
+ try:
63
+ estimator_dtype = fhat.dtype
64
+ except AttributeError:
65
+ estimator_dtype = type(fhat)
66
+
67
+ dlen = len(data)
68
+ n_sets = n_sets or get_bssize(alpha)
69
+
70
+ f_synth = np.empty((n_sets,) + estimator_shape, dtype=estimator_dtype)
71
+
72
+ for i in range(0, n_sets):
73
+ indices = np.random.randint(dlen, size=(dlen,))
74
+ f_synth[i] = estimator(data[indices], *args, **kwargs)
75
+
76
+ f_synth_sorted = sort(f_synth)
77
+ lbi = int(math.floor(n_sets * alpha / 2))
78
+ ubi = int(math.ceil(n_sets * (1 - alpha / 2)))
79
+ lb = f_synth_sorted[lbi]
80
+ ub = f_synth_sorted[ubi]
81
+
82
+ try:
83
+ if extended_output:
84
+ return (fhat, lb, ub, ub - lb, abs((ub - lb) / fhat) if fhat else 0, max(ub - fhat, fhat - lb))
85
+ else:
86
+ return (fhat, lb, ub)
87
+ finally:
88
+ # Do a little explicit memory management
89
+ del f_synth, f_synth_sorted