westpa 2022.13__cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- westpa/__init__.py +14 -0
- westpa/_version.py +21 -0
- westpa/analysis/__init__.py +5 -0
- westpa/analysis/core.py +749 -0
- westpa/analysis/statistics.py +27 -0
- westpa/analysis/trajectories.py +369 -0
- westpa/cli/__init__.py +0 -0
- westpa/cli/core/__init__.py +0 -0
- westpa/cli/core/w_fork.py +152 -0
- westpa/cli/core/w_init.py +230 -0
- westpa/cli/core/w_run.py +77 -0
- westpa/cli/core/w_states.py +212 -0
- westpa/cli/core/w_succ.py +99 -0
- westpa/cli/core/w_truncate.py +68 -0
- westpa/cli/tools/__init__.py +0 -0
- westpa/cli/tools/ploterr.py +506 -0
- westpa/cli/tools/plothist.py +706 -0
- westpa/cli/tools/w_assign.py +597 -0
- westpa/cli/tools/w_bins.py +166 -0
- westpa/cli/tools/w_crawl.py +119 -0
- westpa/cli/tools/w_direct.py +557 -0
- westpa/cli/tools/w_dumpsegs.py +94 -0
- westpa/cli/tools/w_eddist.py +506 -0
- westpa/cli/tools/w_fluxanl.py +376 -0
- westpa/cli/tools/w_ipa.py +832 -0
- westpa/cli/tools/w_kinavg.py +127 -0
- westpa/cli/tools/w_kinetics.py +96 -0
- westpa/cli/tools/w_multi_west.py +414 -0
- westpa/cli/tools/w_ntop.py +213 -0
- westpa/cli/tools/w_pdist.py +515 -0
- westpa/cli/tools/w_postanalysis_matrix.py +82 -0
- westpa/cli/tools/w_postanalysis_reweight.py +53 -0
- westpa/cli/tools/w_red.py +491 -0
- westpa/cli/tools/w_reweight.py +780 -0
- westpa/cli/tools/w_select.py +226 -0
- westpa/cli/tools/w_stateprobs.py +111 -0
- westpa/cli/tools/w_timings.py +113 -0
- westpa/cli/tools/w_trace.py +599 -0
- westpa/core/__init__.py +0 -0
- westpa/core/_rc.py +673 -0
- westpa/core/binning/__init__.py +55 -0
- westpa/core/binning/_assign.c +36018 -0
- westpa/core/binning/_assign.cpython-312-aarch64-linux-gnu.so +0 -0
- westpa/core/binning/_assign.pyx +370 -0
- westpa/core/binning/assign.py +454 -0
- westpa/core/binning/binless.py +96 -0
- westpa/core/binning/binless_driver.py +54 -0
- westpa/core/binning/binless_manager.py +189 -0
- westpa/core/binning/bins.py +47 -0
- westpa/core/binning/mab.py +506 -0
- westpa/core/binning/mab_driver.py +54 -0
- westpa/core/binning/mab_manager.py +197 -0
- westpa/core/data_manager.py +1761 -0
- westpa/core/extloader.py +74 -0
- westpa/core/h5io.py +1079 -0
- westpa/core/kinetics/__init__.py +24 -0
- westpa/core/kinetics/_kinetics.c +45174 -0
- westpa/core/kinetics/_kinetics.cpython-312-aarch64-linux-gnu.so +0 -0
- westpa/core/kinetics/_kinetics.pyx +815 -0
- westpa/core/kinetics/events.py +147 -0
- westpa/core/kinetics/matrates.py +156 -0
- westpa/core/kinetics/rate_averaging.py +266 -0
- westpa/core/progress.py +218 -0
- westpa/core/propagators/__init__.py +54 -0
- westpa/core/propagators/executable.py +592 -0
- westpa/core/propagators/loaders.py +196 -0
- westpa/core/reweight/__init__.py +14 -0
- westpa/core/reweight/_reweight.c +36899 -0
- westpa/core/reweight/_reweight.cpython-312-aarch64-linux-gnu.so +0 -0
- westpa/core/reweight/_reweight.pyx +439 -0
- westpa/core/reweight/matrix.py +126 -0
- westpa/core/segment.py +119 -0
- westpa/core/sim_manager.py +839 -0
- westpa/core/states.py +359 -0
- westpa/core/systems.py +93 -0
- westpa/core/textio.py +74 -0
- westpa/core/trajectory.py +603 -0
- westpa/core/we_driver.py +910 -0
- westpa/core/wm_ops.py +43 -0
- westpa/core/yamlcfg.py +298 -0
- westpa/fasthist/__init__.py +34 -0
- westpa/fasthist/_fasthist.c +38755 -0
- westpa/fasthist/_fasthist.cpython-312-aarch64-linux-gnu.so +0 -0
- westpa/fasthist/_fasthist.pyx +222 -0
- westpa/mclib/__init__.py +271 -0
- westpa/mclib/__main__.py +28 -0
- westpa/mclib/_mclib.c +34610 -0
- westpa/mclib/_mclib.cpython-312-aarch64-linux-gnu.so +0 -0
- westpa/mclib/_mclib.pyx +226 -0
- westpa/oldtools/__init__.py +4 -0
- westpa/oldtools/aframe/__init__.py +35 -0
- westpa/oldtools/aframe/atool.py +75 -0
- westpa/oldtools/aframe/base_mixin.py +26 -0
- westpa/oldtools/aframe/binning.py +178 -0
- westpa/oldtools/aframe/data_reader.py +560 -0
- westpa/oldtools/aframe/iter_range.py +200 -0
- westpa/oldtools/aframe/kinetics.py +117 -0
- westpa/oldtools/aframe/mcbs.py +153 -0
- westpa/oldtools/aframe/output.py +39 -0
- westpa/oldtools/aframe/plotting.py +88 -0
- westpa/oldtools/aframe/trajwalker.py +126 -0
- westpa/oldtools/aframe/transitions.py +469 -0
- westpa/oldtools/cmds/__init__.py +0 -0
- westpa/oldtools/cmds/w_ttimes.py +361 -0
- westpa/oldtools/files.py +34 -0
- westpa/oldtools/miscfn.py +23 -0
- westpa/oldtools/stats/__init__.py +4 -0
- westpa/oldtools/stats/accumulator.py +35 -0
- westpa/oldtools/stats/edfs.py +129 -0
- westpa/oldtools/stats/mcbs.py +96 -0
- westpa/tools/__init__.py +33 -0
- westpa/tools/binning.py +472 -0
- westpa/tools/core.py +340 -0
- westpa/tools/data_reader.py +159 -0
- westpa/tools/dtypes.py +31 -0
- westpa/tools/iter_range.py +198 -0
- westpa/tools/kinetics_tool.py +343 -0
- westpa/tools/plot.py +283 -0
- westpa/tools/progress.py +17 -0
- westpa/tools/selected_segs.py +154 -0
- westpa/tools/wipi.py +751 -0
- westpa/trajtree/__init__.py +4 -0
- westpa/trajtree/_trajtree.c +17829 -0
- westpa/trajtree/_trajtree.cpython-312-aarch64-linux-gnu.so +0 -0
- westpa/trajtree/_trajtree.pyx +130 -0
- westpa/trajtree/trajtree.py +117 -0
- westpa/westext/__init__.py +0 -0
- westpa/westext/adaptvoronoi/__init__.py +3 -0
- westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
- westpa/westext/hamsm_restarting/__init__.py +3 -0
- westpa/westext/hamsm_restarting/example_overrides.py +35 -0
- westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
- westpa/westext/stringmethod/__init__.py +11 -0
- westpa/westext/stringmethod/fourier_fitting.py +69 -0
- westpa/westext/stringmethod/string_driver.py +253 -0
- westpa/westext/stringmethod/string_method.py +306 -0
- westpa/westext/weed/BinCluster.py +180 -0
- westpa/westext/weed/ProbAdjustEquil.py +100 -0
- westpa/westext/weed/UncertMath.py +247 -0
- westpa/westext/weed/__init__.py +10 -0
- westpa/westext/weed/weed_driver.py +192 -0
- westpa/westext/wess/ProbAdjust.py +101 -0
- westpa/westext/wess/__init__.py +6 -0
- westpa/westext/wess/wess_driver.py +217 -0
- westpa/work_managers/__init__.py +57 -0
- westpa/work_managers/core.py +396 -0
- westpa/work_managers/environment.py +134 -0
- westpa/work_managers/mpi.py +318 -0
- westpa/work_managers/processes.py +201 -0
- westpa/work_managers/serial.py +28 -0
- westpa/work_managers/threads.py +79 -0
- westpa/work_managers/zeromq/__init__.py +20 -0
- westpa/work_managers/zeromq/core.py +635 -0
- westpa/work_managers/zeromq/node.py +131 -0
- westpa/work_managers/zeromq/work_manager.py +526 -0
- westpa/work_managers/zeromq/worker.py +320 -0
- westpa-2022.13.dist-info/METADATA +179 -0
- westpa-2022.13.dist-info/RECORD +162 -0
- westpa-2022.13.dist-info/WHEEL +7 -0
- westpa-2022.13.dist-info/entry_points.txt +30 -0
- westpa-2022.13.dist-info/licenses/LICENSE +21 -0
- westpa-2022.13.dist-info/top_level.txt +1 -0
|
Binary file
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
|
|
2
|
+
from __future__ import division
|
|
3
|
+
import numpy, sys
|
|
4
|
+
cimport numpy, cython
|
|
5
|
+
from cpython.buffer cimport *
|
|
6
|
+
from cpython.mem cimport *
|
|
7
|
+
from numpy cimport * #PyArray_DATA, PyArray_TYPE
|
|
8
|
+
|
|
9
|
+
ctypedef fused real_numeric:
|
|
10
|
+
numpy.int8_t
|
|
11
|
+
numpy.int16_t
|
|
12
|
+
numpy.int32_t
|
|
13
|
+
numpy.int64_t
|
|
14
|
+
numpy.uint8_t
|
|
15
|
+
numpy.uint16_t
|
|
16
|
+
numpy.uint32_t
|
|
17
|
+
numpy.uint64_t
|
|
18
|
+
numpy.float32_t
|
|
19
|
+
numpy.float64_t
|
|
20
|
+
|
|
21
|
+
@cython.boundscheck(False)
|
|
22
|
+
@cython.wraparound(False)
|
|
23
|
+
cpdef histnd(values, binbounds, weights=1.0, out=None, binbound_check = True, ignore_out_of_range=False):
|
|
24
|
+
'''Generate an N-dimensional PDF (or contribution to a PDF) from the given values.
|
|
25
|
+
``binbounds`` is a list of arrays of boundary values, with one entry for each
|
|
26
|
+
dimension (``values`` must have as many columns as there are entries in ``binbounds``)
|
|
27
|
+
``weight``, if provided, specifies the weight each value contributes to the
|
|
28
|
+
histogram; this may be a scalar (for equal weights for all values) or a vector of
|
|
29
|
+
the same length as ``values`` (for unequal weights). If ``binbound_check`` is True, then
|
|
30
|
+
the boundaries are checked for strict positive monotonicity; set to False to shave a few
|
|
31
|
+
microseconds if you know your bin boundaries to be monotonically increasing.
|
|
32
|
+
'''
|
|
33
|
+
|
|
34
|
+
if values.ndim != 2:
|
|
35
|
+
values = numpy.atleast_2d(values)
|
|
36
|
+
if values.ndim > 2:
|
|
37
|
+
raise TypeError('values must be 2-D')
|
|
38
|
+
|
|
39
|
+
cdef:
|
|
40
|
+
Py_ssize_t npts = values.shape[0]
|
|
41
|
+
Py_ssize_t ndim = values.shape[1]
|
|
42
|
+
int typecode = PyArray_TYPE(values)
|
|
43
|
+
|
|
44
|
+
if len(binbounds) != ndim:
|
|
45
|
+
raise ValueError('number of sets of bin boundaries ({}) does not match dimensionality of data ({})'
|
|
46
|
+
.format(len(binbounds), values.shape[1]))
|
|
47
|
+
|
|
48
|
+
if binbound_check:
|
|
49
|
+
for idim in xrange(ndim):
|
|
50
|
+
dq = numpy.diff(binbounds[idim])
|
|
51
|
+
if (dq <= 0).any():
|
|
52
|
+
raise ValueError('binbounds in dimension {} are not strictly monotonically increasing'.format(idim))
|
|
53
|
+
|
|
54
|
+
# Prepare bin boundaries arrays
|
|
55
|
+
_binbounds_vectors = numpy.empty((ndim,), numpy.object_)
|
|
56
|
+
_nbounds = numpy.empty((ndim,), numpy.uint32)
|
|
57
|
+
for idim in range(ndim):
|
|
58
|
+
_binbounds = numpy.require(binbounds[idim], values.dtype, 'C')
|
|
59
|
+
_binbounds_vectors[idim] = _binbounds
|
|
60
|
+
_nbounds[idim] = _binbounds.shape[0]
|
|
61
|
+
|
|
62
|
+
# Prepare output array, if necessary
|
|
63
|
+
if out is None:
|
|
64
|
+
_out = numpy.zeros([len(boundset)-1 for boundset in binbounds], numpy.float64)
|
|
65
|
+
else:
|
|
66
|
+
_out = out
|
|
67
|
+
if _out.dtype != numpy.float64:
|
|
68
|
+
raise TypeError('type of output array must be float64')
|
|
69
|
+
if not _out.flags.writeable:
|
|
70
|
+
raise TypeError('output is not writeable')
|
|
71
|
+
|
|
72
|
+
# Prepare weight array
|
|
73
|
+
_weights = numpy.require(weights, numpy.float64, 'C')
|
|
74
|
+
if _weights.shape == ():
|
|
75
|
+
# scalar
|
|
76
|
+
_weights = numpy.empty((len(values),), numpy.float64)
|
|
77
|
+
_weights[:] = weights
|
|
78
|
+
elif _weights.ndim > 1:
|
|
79
|
+
raise TypeError('weight must be scalar or one dimensional')
|
|
80
|
+
elif _weights.shape[0] != values.shape[0]:
|
|
81
|
+
raise TypeError('weights and values must be equal in length')
|
|
82
|
+
|
|
83
|
+
# ugh
|
|
84
|
+
if typecode == NPY_FLOAT32:
|
|
85
|
+
return _histnd[numpy.float32_t](values,
|
|
86
|
+
_binbounds_vectors,
|
|
87
|
+
<numpy.uint32_t*> PyArray_DATA(_nbounds),
|
|
88
|
+
<numpy.float64_t*> PyArray_DATA(_weights),
|
|
89
|
+
ignore_out_of_range,
|
|
90
|
+
_out)
|
|
91
|
+
elif typecode == NPY_FLOAT64:
|
|
92
|
+
return _histnd[numpy.float64_t](values,
|
|
93
|
+
_binbounds_vectors,
|
|
94
|
+
<numpy.uint32_t*> PyArray_DATA(_nbounds),
|
|
95
|
+
<numpy.float64_t*> PyArray_DATA(_weights),
|
|
96
|
+
ignore_out_of_range,
|
|
97
|
+
_out)
|
|
98
|
+
elif typecode == NPY_INT8:
|
|
99
|
+
return _histnd[numpy.int8_t](values,
|
|
100
|
+
_binbounds_vectors,
|
|
101
|
+
<numpy.uint32_t*> PyArray_DATA(_nbounds),
|
|
102
|
+
<numpy.float64_t*> PyArray_DATA(_weights),
|
|
103
|
+
ignore_out_of_range,
|
|
104
|
+
_out)
|
|
105
|
+
elif typecode == NPY_INT16:
|
|
106
|
+
return _histnd[numpy.int16_t](values,
|
|
107
|
+
_binbounds_vectors,
|
|
108
|
+
<numpy.uint32_t*> PyArray_DATA(_nbounds),
|
|
109
|
+
<numpy.float64_t*> PyArray_DATA(_weights),
|
|
110
|
+
ignore_out_of_range,
|
|
111
|
+
_out)
|
|
112
|
+
elif typecode == NPY_INT32:
|
|
113
|
+
return _histnd[numpy.int32_t](values,
|
|
114
|
+
_binbounds_vectors,
|
|
115
|
+
<numpy.uint32_t*> PyArray_DATA(_nbounds),
|
|
116
|
+
<numpy.float64_t*> PyArray_DATA(_weights),
|
|
117
|
+
ignore_out_of_range,
|
|
118
|
+
_out)
|
|
119
|
+
elif typecode == NPY_INT64:
|
|
120
|
+
return _histnd[numpy.int64_t](values,
|
|
121
|
+
_binbounds_vectors,
|
|
122
|
+
<numpy.uint32_t*> PyArray_DATA(_nbounds),
|
|
123
|
+
<numpy.float64_t*> PyArray_DATA(_weights),
|
|
124
|
+
ignore_out_of_range,
|
|
125
|
+
_out)
|
|
126
|
+
elif typecode == NPY_UINT8:
|
|
127
|
+
return _histnd[numpy.uint8_t](values,
|
|
128
|
+
_binbounds_vectors,
|
|
129
|
+
<numpy.uint32_t*> PyArray_DATA(_nbounds),
|
|
130
|
+
<numpy.float64_t*> PyArray_DATA(_weights),
|
|
131
|
+
ignore_out_of_range,
|
|
132
|
+
_out)
|
|
133
|
+
elif typecode == NPY_UINT16:
|
|
134
|
+
return _histnd[numpy.uint16_t](values,
|
|
135
|
+
_binbounds_vectors,
|
|
136
|
+
<numpy.uint32_t*> PyArray_DATA(_nbounds),
|
|
137
|
+
<numpy.float64_t*> PyArray_DATA(_weights),
|
|
138
|
+
ignore_out_of_range,
|
|
139
|
+
_out)
|
|
140
|
+
elif typecode == NPY_UINT32:
|
|
141
|
+
return _histnd[numpy.uint32_t](values,
|
|
142
|
+
_binbounds_vectors,
|
|
143
|
+
<numpy.uint32_t*> PyArray_DATA(_nbounds),
|
|
144
|
+
<numpy.float64_t*> PyArray_DATA(_weights),
|
|
145
|
+
ignore_out_of_range,
|
|
146
|
+
_out)
|
|
147
|
+
elif typecode == NPY_UINT64:
|
|
148
|
+
return _histnd[numpy.uint64_t](values,
|
|
149
|
+
_binbounds_vectors,
|
|
150
|
+
<numpy.uint32_t*> PyArray_DATA(_nbounds),
|
|
151
|
+
<numpy.float64_t*> PyArray_DATA(_weights),
|
|
152
|
+
ignore_out_of_range,
|
|
153
|
+
_out)
|
|
154
|
+
else:
|
|
155
|
+
raise TypeError('real floating-point or integer input required')
|
|
156
|
+
|
|
157
|
+
@cython.boundscheck(False)
|
|
158
|
+
@cython.wraparound(False)
|
|
159
|
+
cdef _histnd(real_numeric[:,:] values, object[:] binbounds, numpy.uint32_t* nbounds, double* weights,
|
|
160
|
+
bint ignore_out_of_range, object output):
|
|
161
|
+
'''Bin the values stored in the 2-D array ``values`` with corresponding weights ``weights``
|
|
162
|
+
into the array of bins ``output``. The bin boundaries are specified in ``binbounds``, and the
|
|
163
|
+
length of each set of boundaries is stored in ``nbounds``.
|
|
164
|
+
|
|
165
|
+
Pre-conditions:
|
|
166
|
+
* output is backed by 64-bit floating point storage
|
|
167
|
+
* len(binbounds) == len(nbounds) == values.ndim
|
|
168
|
+
'''
|
|
169
|
+
|
|
170
|
+
cdef:
|
|
171
|
+
Py_buffer outputview, bbview
|
|
172
|
+
Py_ssize_t ndim = values.shape[1], npts = values.shape[0]
|
|
173
|
+
Py_ssize_t idim, ipt, ibound
|
|
174
|
+
char* outptr_bytes
|
|
175
|
+
double* outptr
|
|
176
|
+
real_numeric val, lb, ub
|
|
177
|
+
real_numeric* boundbuf
|
|
178
|
+
real_numeric** _binbounds
|
|
179
|
+
bint store_value
|
|
180
|
+
|
|
181
|
+
# Get pointers to the (contiguous) lists of bin boundaries in each dimension
|
|
182
|
+
_binbounds = <real_numeric**> PyMem_Malloc(ndim*sizeof(real_numeric*))
|
|
183
|
+
if not _binbounds:
|
|
184
|
+
raise MemoryError()
|
|
185
|
+
for idim in range(ndim):
|
|
186
|
+
PyObject_GetBuffer(binbounds[idim], &bbview, PyBUF_SIMPLE)
|
|
187
|
+
_binbounds[idim] = <real_numeric*> bbview.buf
|
|
188
|
+
PyBuffer_Release(&bbview)
|
|
189
|
+
|
|
190
|
+
# Get a view of our output array, so we can write directly into it
|
|
191
|
+
PyObject_GetBuffer(output, &outputview, PyBUF_STRIDED)
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
with nogil:
|
|
195
|
+
# loop over points
|
|
196
|
+
for ipt in range(npts):
|
|
197
|
+
outptr_bytes = <char*> outputview.buf
|
|
198
|
+
store_value = True
|
|
199
|
+
for idim in range(ndim):
|
|
200
|
+
val = values[ipt,idim]
|
|
201
|
+
for ibound in range(nbounds[idim]-1):
|
|
202
|
+
lb = _binbounds[idim][ibound]
|
|
203
|
+
ub = _binbounds[idim][ibound+1]
|
|
204
|
+
if val >= lb and val < ub:
|
|
205
|
+
outptr_bytes += outputview.strides[idim] * ibound
|
|
206
|
+
break
|
|
207
|
+
else:
|
|
208
|
+
if not ignore_out_of_range:
|
|
209
|
+
with gil:
|
|
210
|
+
raise ValueError('value {} at index {} out of bin boundaries in dimension {}'
|
|
211
|
+
.format(val,ipt,idim))
|
|
212
|
+
else:
|
|
213
|
+
store_value = False
|
|
214
|
+
if store_value:
|
|
215
|
+
outptr = <double*> outptr_bytes
|
|
216
|
+
outptr[0] += weights[ipt]
|
|
217
|
+
return output
|
|
218
|
+
finally:
|
|
219
|
+
PyMem_Free(_binbounds)
|
|
220
|
+
PyBuffer_Release(&outputview)
|
|
221
|
+
|
|
222
|
+
|
westpa/mclib/__init__.py
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
'''A package for performing Monte Carlo bootstrap estimates of
|
|
2
|
+
statistics.'''
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from numpy.random import Generator, MT19937
|
|
6
|
+
|
|
7
|
+
from ._mclib import mcbs_correltime, get_bssize, mcbs_ci
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def msort(input_array):
|
|
11
|
+
return np.sort(input_array, axis=0)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def mcbs_ci_correl(
|
|
15
|
+
estimator_datasets,
|
|
16
|
+
estimator,
|
|
17
|
+
alpha,
|
|
18
|
+
n_sets=None,
|
|
19
|
+
args=None,
|
|
20
|
+
autocorrel_alpha=None,
|
|
21
|
+
autocorrel_n_sets=None,
|
|
22
|
+
subsample=None,
|
|
23
|
+
do_correl=True,
|
|
24
|
+
mcbs_enable=None,
|
|
25
|
+
estimator_kwargs={},
|
|
26
|
+
):
|
|
27
|
+
'''Perform a Monte Carlo bootstrap estimate for the (1-``alpha``) confidence interval
|
|
28
|
+
on the given ``dataset`` with the given ``estimator``. This routine is appropriate
|
|
29
|
+
for time-correlated data, using the method described in Huber & Kim, "Weighted-ensemble
|
|
30
|
+
Brownian dynamics simulations for protein association reactions" (1996),
|
|
31
|
+
doi:10.1016/S0006-3495(96)79552-8 to determine a statistically-significant correlation time
|
|
32
|
+
and then reducing the dataset by a factor of that correlation time before running a "classic"
|
|
33
|
+
Monte Carlo bootstrap.
|
|
34
|
+
|
|
35
|
+
Returns ``(estimate, ci_lb, ci_ub, correl_time)`` where ``estimate`` is the application of the
|
|
36
|
+
given ``estimator`` to the input ``dataset``, ``ci_lb`` and ``ci_ub`` are the
|
|
37
|
+
lower and upper limits, respectively, of the (1-``alpha``) confidence interval on
|
|
38
|
+
``estimate``, and ``correl_time`` is the correlation time of the dataset, significant to
|
|
39
|
+
(1-``autocorrel_alpha``).
|
|
40
|
+
|
|
41
|
+
``estimator`` is called as ``estimator(dataset, *args, **kwargs)``. Common estimators include:
|
|
42
|
+
* np.mean -- calculate the confidence interval on the mean of ``dataset``
|
|
43
|
+
* np.median -- calculate a confidence interval on the median of ``dataset``
|
|
44
|
+
* np.std -- calculate a confidence interval on the standard deviation of ``datset``.
|
|
45
|
+
|
|
46
|
+
``n_sets`` is the number of synthetic data sets to generate using the given ``estimator``,
|
|
47
|
+
which will be chosen using `get_bssize()`_ if ``n_sets`` is not given.
|
|
48
|
+
|
|
49
|
+
``autocorrel_alpha`` (which defaults to ``alpha``) can be used to adjust the significance
|
|
50
|
+
level of the autocorrelation calculation. Note that too high a significance level (too low an
|
|
51
|
+
alpha) for evaluating the significance of autocorrelation values can result in a failure to
|
|
52
|
+
detect correlation if the autocorrelation function is noisy.
|
|
53
|
+
|
|
54
|
+
The given ``subsample`` function is used, if provided, to subsample the dataset prior to running
|
|
55
|
+
the full Monte Carlo bootstrap. If none is provided, then a random entry from each correlated
|
|
56
|
+
block is used as the value for that block. Other reasonable choices include ``np.mean``,
|
|
57
|
+
``np.median``, ``(lambda x: x[0])`` or ``(lambda x: x[-1])``. In particular, using
|
|
58
|
+
``subsample=np.mean`` will converge to the block averaged mean and standard error,
|
|
59
|
+
while accounting for any non-normality in the distribution of the mean.
|
|
60
|
+
'''
|
|
61
|
+
|
|
62
|
+
if alpha > 0.5:
|
|
63
|
+
raise ValueError('alpha ({}) > 0.5'.format(alpha))
|
|
64
|
+
|
|
65
|
+
autocorrel_alpha = alpha if not autocorrel_alpha else autocorrel_alpha
|
|
66
|
+
|
|
67
|
+
# We're now passing in dataset as a dict, so we need to enforce that for compatibility with older tools.
|
|
68
|
+
# This just takes our dataset and puts it into a dict, as it's likely that we're using
|
|
69
|
+
# mean or median as our estimators, which take "a" as argument input.
|
|
70
|
+
if not isinstance(estimator_datasets, dict):
|
|
71
|
+
# Enforcing the data structure.
|
|
72
|
+
pre_calculated = estimator_datasets
|
|
73
|
+
estimator_datasets = {'a': estimator_datasets}
|
|
74
|
+
# This also probably means our estimator isn't going to handle kwargs, so we'll watch out for that later in testing.
|
|
75
|
+
# We may have to replace the 'simple' estimator with a slightly more complex lambda function which simply ditches extra arguments.
|
|
76
|
+
for key, dset in estimator_datasets.items():
|
|
77
|
+
estimator_datasets[key] = np.asanyarray(dset)
|
|
78
|
+
dlen = dset.shape[0]
|
|
79
|
+
|
|
80
|
+
# Why do we have 'estimator_datasets'?
|
|
81
|
+
# Estimators may require many different sets of data to properly function; while we can send this in via the kwargs,
|
|
82
|
+
# we may wish to decimate only a certain subset (due to the block bootstrapping) of the input parameters.
|
|
83
|
+
# Therefore, 'estimator_datasets' should consist of datasets that must be sliced/decimated with the subsampling function.
|
|
84
|
+
# Some estimators (such as the reweighting) may not be able to be decimated in a straightforward manner with a subsample function,
|
|
85
|
+
# as we cannot pre-estimate the quantity without introducing error or bias. In those cases, we may wish to pass on all the data,
|
|
86
|
+
# but ensure that our estimator only includes certain iterations (and only in a certain way).
|
|
87
|
+
|
|
88
|
+
n_sets = n_sets or get_bssize(alpha)
|
|
89
|
+
autocorrel_n_sets = autocorrel_n_sets or get_bssize(autocorrel_alpha)
|
|
90
|
+
|
|
91
|
+
if mcbs_enable is False:
|
|
92
|
+
# While it's odd to support NOT doing the bootstrap in a library specifically designed for bootstrapping,
|
|
93
|
+
# supporting this functionality here makes writing the code a lot easier, as we can just pass in a flag.
|
|
94
|
+
# Specifically, this is for situations in which error is not desired (that is, only a reasonable mean is desired).
|
|
95
|
+
# It's often useful when doing a quick analysis.
|
|
96
|
+
estimator_datasets.update(estimator_kwargs)
|
|
97
|
+
try:
|
|
98
|
+
estimator_datasets.update({'stride': 1})
|
|
99
|
+
except Exception:
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
return_set = estimator(**estimator_datasets)
|
|
103
|
+
# We don't try and pretend we're doing any error analysis.
|
|
104
|
+
return return_set, return_set, return_set, 0, 1
|
|
105
|
+
|
|
106
|
+
rng = Generator(MT19937()) # RNG
|
|
107
|
+
|
|
108
|
+
# We need to pre-generate the data; why not do it here? We're already set up for it...
|
|
109
|
+
precalc_kwargs = estimator_kwargs.copy()
|
|
110
|
+
precalc_kwargs['stride'] = 1
|
|
111
|
+
pre_calculated = []
|
|
112
|
+
for block in range(1, dlen + 1):
|
|
113
|
+
for key, dset in estimator_datasets.items():
|
|
114
|
+
precalc_kwargs[key] = dset[0:block]
|
|
115
|
+
pre_calculated.append(estimator(**precalc_kwargs))
|
|
116
|
+
# We need to get rid of any NaNs.
|
|
117
|
+
pre_calculated = np.asanyarray(pre_calculated)
|
|
118
|
+
pre_calculated = pre_calculated[np.isfinite(pre_calculated)]
|
|
119
|
+
# If this happens, we have a huge NaN problem. That is, our estimator is failing to return meaningful
|
|
120
|
+
# numbers. We should catch this when it happens, and so raise an exception, here.
|
|
121
|
+
# This is almost certainly due to estimator failure. Double check that calculation.
|
|
122
|
+
if pre_calculated.shape == (0,):
|
|
123
|
+
raise NameError("Looks like the estimator failed. This is likely a programming issue, and should be reported.")
|
|
124
|
+
# If pre-calculated is not None, we'll use that instead of dataset.
|
|
125
|
+
# We can also assume that it's a 1 dimensional set with nothing needed, so 'key' should work.
|
|
126
|
+
if do_correl is True:
|
|
127
|
+
correl_len = mcbs_correltime(pre_calculated, autocorrel_alpha, autocorrel_n_sets)
|
|
128
|
+
else:
|
|
129
|
+
correl_len = 0
|
|
130
|
+
if correl_len == len(pre_calculated):
|
|
131
|
+
# too correlated for meaningful calculations
|
|
132
|
+
estimator_datasets.update(estimator_kwargs)
|
|
133
|
+
try:
|
|
134
|
+
estimator_datasets.update({'stride': 1})
|
|
135
|
+
except Exception:
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
return estimator(**estimator_datasets), pre_calculated.min(), pre_calculated.max(), (np.std(pre_calculated)), correl_len
|
|
139
|
+
|
|
140
|
+
# else, do a blocked bootstrap
|
|
141
|
+
stride = correl_len + 1
|
|
142
|
+
|
|
143
|
+
if stride == 1:
|
|
144
|
+
# Some estimators may require the stride, so we pass it in.
|
|
145
|
+
estimator_kwargs['stride'] = stride
|
|
146
|
+
return mcbs_ci(
|
|
147
|
+
dataset=estimator_datasets,
|
|
148
|
+
estimator=estimator,
|
|
149
|
+
alpha=alpha,
|
|
150
|
+
dlen=dlen,
|
|
151
|
+
n_sets=n_sets,
|
|
152
|
+
args=args,
|
|
153
|
+
kwargs=estimator_kwargs,
|
|
154
|
+
sort=msort,
|
|
155
|
+
) + (correl_len,)
|
|
156
|
+
else:
|
|
157
|
+
subsample = subsample or (lambda x: x[rng.integers(len(x))])
|
|
158
|
+
# Let's make sure we decimate every array properly...
|
|
159
|
+
decim_list = {}
|
|
160
|
+
for key, dset in estimator_datasets.items():
|
|
161
|
+
dset_shape = list(dset.shape)
|
|
162
|
+
n_slices = dset_shape[0] // stride
|
|
163
|
+
dset_shape[0] = n_slices
|
|
164
|
+
decim_set = np.empty((dset_shape), dtype=dset.dtype)
|
|
165
|
+
for iout, istart in enumerate(range(0, dset.shape[0] - stride + 1, stride)):
|
|
166
|
+
sl = dset[istart : istart + stride]
|
|
167
|
+
# We assume time is the 0th axis.
|
|
168
|
+
# Okay, so non-optimal. Population requires the axis subsampling to be done just so...
|
|
169
|
+
try:
|
|
170
|
+
decim_set[iout] = subsample(sl, axis=0)
|
|
171
|
+
except Exception:
|
|
172
|
+
decim_set[iout] = subsample(sl)
|
|
173
|
+
decim_list[key] = decim_set
|
|
174
|
+
dlen = dset_shape[0]
|
|
175
|
+
estimator_kwargs['stride'] = stride
|
|
176
|
+
|
|
177
|
+
return mcbs_ci(
|
|
178
|
+
dataset=decim_list,
|
|
179
|
+
estimator=estimator,
|
|
180
|
+
alpha=alpha,
|
|
181
|
+
dlen=dlen,
|
|
182
|
+
n_sets=n_sets,
|
|
183
|
+
args=args,
|
|
184
|
+
kwargs=estimator_kwargs,
|
|
185
|
+
sort=msort,
|
|
186
|
+
) + (correl_len,)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
# These are blocks designed to evaluate simple information sets.
|
|
190
|
+
# Whether they should go here or in westtoools is somewhat up for debate.
|
|
191
|
+
# Currently, nothing actually uses them, so there's that.
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _1D_simple_eval_block(
|
|
195
|
+
iblock,
|
|
196
|
+
start,
|
|
197
|
+
stop,
|
|
198
|
+
nstates,
|
|
199
|
+
data_input,
|
|
200
|
+
name,
|
|
201
|
+
mcbs_alpha,
|
|
202
|
+
mcbs_nsets,
|
|
203
|
+
mcbs_acalpha,
|
|
204
|
+
do_correl,
|
|
205
|
+
mcbs_enable,
|
|
206
|
+
subsample=np.mean,
|
|
207
|
+
**extra,
|
|
208
|
+
):
|
|
209
|
+
# This is actually appropriate for anything with a directly measured, 1D dataset, i.e.,
|
|
210
|
+
# Fluxes, color populations, and state populations.
|
|
211
|
+
results = []
|
|
212
|
+
for istate in range(nstates):
|
|
213
|
+
# Not sure if we need a jstate for these estimators, but we'll see.
|
|
214
|
+
# kwargs = {'istate': istate, 'jstate': 'B'}
|
|
215
|
+
estimator_datasets = {'dataset': data_input['dataset'][:, istate]}
|
|
216
|
+
ci_res = mcbs_ci_correl(
|
|
217
|
+
estimator_datasets,
|
|
218
|
+
estimator=(lambda stride, dataset: np.mean(dataset)),
|
|
219
|
+
alpha=mcbs_alpha,
|
|
220
|
+
n_sets=mcbs_nsets,
|
|
221
|
+
autocorrel_alpha=mcbs_acalpha,
|
|
222
|
+
subsample=subsample,
|
|
223
|
+
do_correl=do_correl,
|
|
224
|
+
mcbs_enable=mcbs_enable,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
results.append((name, iblock, istate, (start, stop) + ci_res))
|
|
228
|
+
|
|
229
|
+
return results
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _2D_simple_eval_block(
|
|
233
|
+
iblock,
|
|
234
|
+
start,
|
|
235
|
+
stop,
|
|
236
|
+
nstates,
|
|
237
|
+
data_input,
|
|
238
|
+
name,
|
|
239
|
+
mcbs_alpha,
|
|
240
|
+
mcbs_nsets,
|
|
241
|
+
mcbs_acalpha,
|
|
242
|
+
do_correl,
|
|
243
|
+
mcbs_enable,
|
|
244
|
+
subsample=np.mean,
|
|
245
|
+
**extra,
|
|
246
|
+
):
|
|
247
|
+
# This is really just a simple 2D block for less complex datasets, but there it is.
|
|
248
|
+
# It's probably limited in this use case to conditional_fluxes, but anything that's an i to j process that is directly measured
|
|
249
|
+
# is suitable for use with this.
|
|
250
|
+
results = []
|
|
251
|
+
for istate in range(nstates):
|
|
252
|
+
for jstate in range(nstates):
|
|
253
|
+
if istate == jstate:
|
|
254
|
+
continue
|
|
255
|
+
# kwargs = {'istate': istate, 'jstate': jstate}
|
|
256
|
+
# dataset = {'dataset': cond_fluxes[:, istate, jstate]}
|
|
257
|
+
estimator_datasets = {'dataset': data_input['dataset'][:, istate, jstate]}
|
|
258
|
+
ci_res = mcbs_ci_correl(
|
|
259
|
+
estimator_datasets,
|
|
260
|
+
estimator=(lambda stride, dataset: np.mean(dataset)),
|
|
261
|
+
alpha=mcbs_alpha,
|
|
262
|
+
n_sets=mcbs_nsets,
|
|
263
|
+
autocorrel_alpha=mcbs_acalpha,
|
|
264
|
+
subsample=subsample,
|
|
265
|
+
do_correl=do_correl,
|
|
266
|
+
mcbs_enable=mcbs_enable,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
results.append((name, iblock, istate, jstate, (start, stop) + ci_res))
|
|
270
|
+
|
|
271
|
+
return results
|
westpa/mclib/__main__.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
if __name__ == '__main__':
|
|
2
|
+
from . import autocorrel_elem
|
|
3
|
+
import numpy
|
|
4
|
+
from scipy.signal import correlate
|
|
5
|
+
|
|
6
|
+
n = 16
|
|
7
|
+
x = numpy.linspace(0, n * numpy.pi, 16 * n + 1)
|
|
8
|
+
a = numpy.cos(x) + numpy.exp(-((x / 2.0) ** 2)) + numpy.exp(-(x / 4.0))
|
|
9
|
+
pa = numpy.zeros((10000 * len(a),), numpy.float64)
|
|
10
|
+
pa[: len(a)] = a
|
|
11
|
+
|
|
12
|
+
print('<a> =', a.mean())
|
|
13
|
+
print('<a^2> =', ((a - a.mean()) ** 2).sum())
|
|
14
|
+
print('scipy.signal.correlate:')
|
|
15
|
+
acf0 = correlate(a, a)
|
|
16
|
+
acf0 = acf0[-len(a) :]
|
|
17
|
+
acf0 /= acf0.max()
|
|
18
|
+
print(acf0[: len(acf0) / 4])
|
|
19
|
+
|
|
20
|
+
# print 'scipy.signal.correlate (-mean):'
|
|
21
|
+
# acf0 = correlate(a-a.mean(),a-a.mean())
|
|
22
|
+
# acf0 = acf0[-len(a):]
|
|
23
|
+
# acf0 /= acf0.max()
|
|
24
|
+
# print acf0[:len(acf0)/4]
|
|
25
|
+
|
|
26
|
+
print('this module:')
|
|
27
|
+
acf = numpy.array([autocorrel_elem(pa, k) for k in range(len(a))])
|
|
28
|
+
print(acf[: len(acf) / 4])
|