PyPI - westpa - Versions diffs - 2022.12__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - Mend

westpa 2022.12__cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of westpa might be problematic. Click here for more details.

Files changed (149) hide show

westpa/__init__.py +14 -0
westpa/_version.py +21 -0
westpa/analysis/__init__.py +5 -0
westpa/analysis/core.py +746 -0
westpa/analysis/statistics.py +27 -0
westpa/analysis/trajectories.py +360 -0
westpa/cli/__init__.py +0 -0
westpa/cli/core/__init__.py +0 -0
westpa/cli/core/w_fork.py +152 -0
westpa/cli/core/w_init.py +230 -0
westpa/cli/core/w_run.py +77 -0
westpa/cli/core/w_states.py +212 -0
westpa/cli/core/w_succ.py +99 -0
westpa/cli/core/w_truncate.py +68 -0
westpa/cli/tools/__init__.py +0 -0
westpa/cli/tools/ploterr.py +506 -0
westpa/cli/tools/plothist.py +706 -0
westpa/cli/tools/w_assign.py +596 -0
westpa/cli/tools/w_bins.py +166 -0
westpa/cli/tools/w_crawl.py +119 -0
westpa/cli/tools/w_direct.py +547 -0
westpa/cli/tools/w_dumpsegs.py +94 -0
westpa/cli/tools/w_eddist.py +506 -0
westpa/cli/tools/w_fluxanl.py +376 -0
westpa/cli/tools/w_ipa.py +833 -0
westpa/cli/tools/w_kinavg.py +127 -0
westpa/cli/tools/w_kinetics.py +96 -0
westpa/cli/tools/w_multi_west.py +414 -0
westpa/cli/tools/w_ntop.py +213 -0
westpa/cli/tools/w_pdist.py +515 -0
westpa/cli/tools/w_postanalysis_matrix.py +82 -0
westpa/cli/tools/w_postanalysis_reweight.py +53 -0
westpa/cli/tools/w_red.py +491 -0
westpa/cli/tools/w_reweight.py +780 -0
westpa/cli/tools/w_select.py +226 -0
westpa/cli/tools/w_stateprobs.py +111 -0
westpa/cli/tools/w_trace.py +599 -0
westpa/core/__init__.py +0 -0
westpa/core/_rc.py +673 -0
westpa/core/binning/__init__.py +55 -0
westpa/core/binning/_assign.cpython-313-x86_64-linux-gnu.so +0 -0
westpa/core/binning/assign.py +455 -0
westpa/core/binning/binless.py +96 -0
westpa/core/binning/binless_driver.py +54 -0
westpa/core/binning/binless_manager.py +190 -0
westpa/core/binning/bins.py +47 -0
westpa/core/binning/mab.py +506 -0
westpa/core/binning/mab_driver.py +54 -0
westpa/core/binning/mab_manager.py +198 -0
westpa/core/data_manager.py +1694 -0
westpa/core/extloader.py +74 -0
westpa/core/h5io.py +995 -0
westpa/core/kinetics/__init__.py +24 -0
westpa/core/kinetics/_kinetics.cpython-313-x86_64-linux-gnu.so +0 -0
westpa/core/kinetics/events.py +147 -0
westpa/core/kinetics/matrates.py +156 -0
westpa/core/kinetics/rate_averaging.py +266 -0
westpa/core/progress.py +218 -0
westpa/core/propagators/__init__.py +54 -0
westpa/core/propagators/executable.py +719 -0
westpa/core/reweight/__init__.py +14 -0
westpa/core/reweight/_reweight.cpython-313-x86_64-linux-gnu.so +0 -0
westpa/core/reweight/matrix.py +126 -0
westpa/core/segment.py +119 -0
westpa/core/sim_manager.py +835 -0
westpa/core/states.py +359 -0
westpa/core/systems.py +93 -0
westpa/core/textio.py +74 -0
westpa/core/trajectory.py +330 -0
westpa/core/we_driver.py +910 -0
westpa/core/wm_ops.py +43 -0
westpa/core/yamlcfg.py +391 -0
westpa/fasthist/__init__.py +34 -0
westpa/fasthist/_fasthist.cpython-313-x86_64-linux-gnu.so +0 -0
westpa/mclib/__init__.py +271 -0
westpa/mclib/__main__.py +28 -0
westpa/mclib/_mclib.cpython-313-x86_64-linux-gnu.so +0 -0
westpa/oldtools/__init__.py +4 -0
westpa/oldtools/aframe/__init__.py +35 -0
westpa/oldtools/aframe/atool.py +75 -0
westpa/oldtools/aframe/base_mixin.py +26 -0
westpa/oldtools/aframe/binning.py +178 -0
westpa/oldtools/aframe/data_reader.py +560 -0
westpa/oldtools/aframe/iter_range.py +200 -0
westpa/oldtools/aframe/kinetics.py +117 -0
westpa/oldtools/aframe/mcbs.py +153 -0
westpa/oldtools/aframe/output.py +39 -0
westpa/oldtools/aframe/plotting.py +90 -0
westpa/oldtools/aframe/trajwalker.py +126 -0
westpa/oldtools/aframe/transitions.py +469 -0
westpa/oldtools/cmds/__init__.py +0 -0
westpa/oldtools/cmds/w_ttimes.py +361 -0
westpa/oldtools/files.py +34 -0
westpa/oldtools/miscfn.py +23 -0
westpa/oldtools/stats/__init__.py +4 -0
westpa/oldtools/stats/accumulator.py +35 -0
westpa/oldtools/stats/edfs.py +129 -0
westpa/oldtools/stats/mcbs.py +96 -0
westpa/tools/__init__.py +33 -0
westpa/tools/binning.py +472 -0
westpa/tools/core.py +340 -0
westpa/tools/data_reader.py +159 -0
westpa/tools/dtypes.py +31 -0
westpa/tools/iter_range.py +198 -0
westpa/tools/kinetics_tool.py +340 -0
westpa/tools/plot.py +283 -0
westpa/tools/progress.py +17 -0
westpa/tools/selected_segs.py +154 -0
westpa/tools/wipi.py +751 -0
westpa/trajtree/__init__.py +4 -0
westpa/trajtree/_trajtree.cpython-313-x86_64-linux-gnu.so +0 -0
westpa/trajtree/trajtree.py +117 -0
westpa/westext/__init__.py +0 -0
westpa/westext/adaptvoronoi/__init__.py +3 -0
westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
westpa/westext/hamsm_restarting/__init__.py +3 -0
westpa/westext/hamsm_restarting/example_overrides.py +35 -0
westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
westpa/westext/stringmethod/__init__.py +11 -0
westpa/westext/stringmethod/fourier_fitting.py +69 -0
westpa/westext/stringmethod/string_driver.py +253 -0
westpa/westext/stringmethod/string_method.py +306 -0
westpa/westext/weed/BinCluster.py +180 -0
westpa/westext/weed/ProbAdjustEquil.py +100 -0
westpa/westext/weed/UncertMath.py +247 -0
westpa/westext/weed/__init__.py +10 -0
westpa/westext/weed/weed_driver.py +192 -0
westpa/westext/wess/ProbAdjust.py +101 -0
westpa/westext/wess/__init__.py +6 -0
westpa/westext/wess/wess_driver.py +217 -0
westpa/work_managers/__init__.py +57 -0
westpa/work_managers/core.py +396 -0
westpa/work_managers/environment.py +134 -0
westpa/work_managers/mpi.py +318 -0
westpa/work_managers/processes.py +187 -0
westpa/work_managers/serial.py +28 -0
westpa/work_managers/threads.py +79 -0
westpa/work_managers/zeromq/__init__.py +20 -0
westpa/work_managers/zeromq/core.py +641 -0
westpa/work_managers/zeromq/node.py +131 -0
westpa/work_managers/zeromq/work_manager.py +526 -0
westpa/work_managers/zeromq/worker.py +320 -0
westpa-2022.12.dist-info/AUTHORS +22 -0
westpa-2022.12.dist-info/LICENSE +21 -0
westpa-2022.12.dist-info/METADATA +193 -0
westpa-2022.12.dist-info/RECORD +149 -0
westpa-2022.12.dist-info/WHEEL +6 -0
westpa-2022.12.dist-info/entry_points.txt +29 -0
westpa-2022.12.dist-info/top_level.txt +1 -0

westpa/oldtools/cmds/w_ttimes.py ADDED Viewed

@@ -0,0 +1,361 @@
+import argparse
+import logging
+import numpy as np
+from numpy.random import Generator, MT19937
+import westpa
+from oldtools.aframe import (
+    WESTAnalysisTool,
+    BinningMixin,
+    WESTDataReaderMixin,
+    IterRangeMixin,
+    MCBSMixin,
+    TransitionAnalysisMixin,
+    KineticsAnalysisMixin,
+    CommonOutputMixin,
+    BFDataManager,
+    BFTransitionAnalysisMixin,
+)
+log = logging.getLogger('w_ttimes')
+ciinfo_dtype = np.dtype([('expectation', np.float64), ('ci_lower', np.float64), ('ci_upper', np.float64)])
+class WTTimesBase:
+    def __init__(self):
+        super().__init__()
+        self.ed_stats_filename = None
+        self.fpt_stats_filename = None
+        self.flux_stats_filename = None
+        self.rate_stats_filename = None
+        self.suppress_headers = None
+        self.print_bin_labels = None
+        self.ttimes_group = None
+        self.durations = None
+        self.fpts = None
+        self.fluxes = None
+        self.rates = None
+        self.rng = Generator(MT19937())
+    def add_args(self, parser, upcall=True):
+        '''Add arguments to a parser common to all analyses of this type.'''
+        if upcall:
+            try:
+                upfunc = super().add_args
+            except AttributeError:
+                pass
+            else:
+                upfunc(parser)
+        output_options = parser.add_argument_group('kinetics analysis output options')
+        output_options.add_argument(
+            '--edstats',
+            dest='ed_stats',
+            default='edstats.txt',
+            help='Store event duration statistics in ED_STATS (default: edstats.txt)',
+        )
+        if self.bf_mode:
+            output_options.add_argument(
+                '--fptstats',
+                dest='fpt_stats',
+                default='fptstats.txt',
+                help='Store first passage time statistics in FPT_STATS (default: fptstats.txt).',
+            )
+        else:
+            output_options.add_argument(
+                '--fptstats', dest='fpt_stats', help='Store first passage time statistics in FPT_STATS (default: do not store).'
+            )
+        output_options.add_argument(
+            '--fluxstats',
+            dest='flux_stats',
+            default='fluxstats.txt',
+            help='Store flux statistics in FLUX_STATS (default: fluxstats.txt)',
+        )
+        output_options.add_argument(
+            '--ratestats',
+            dest='rate_stats',
+            default='ratestats.txt',
+            help='Store rate statistics in RATE_STATS (default: ratestats.txt)',
+        )
+        self.add_common_output_args(output_options)
+    def process_args(self, args, upcall=True):
+        self.ed_stats_filename = args.ed_stats
+        self.fpt_stats_filename = args.fpt_stats
+        self.flux_stats_filename = args.flux_stats
+        self.rate_stats_filename = args.rate_stats
+        self.process_common_output_args(args)
+        self.calc_fpts = bool(args.fpt_stats)
+        if upcall:
+            try:
+                upfunc = super().process_args
+            except AttributeError:
+                pass
+            else:
+                upfunc(args)
+    def gen_stats(self):
+        self.require_transitions_group()
+        westpa.rc.pstatus('Analyzing transition statistics...')
+        dt = self.dt
+        n_sets = self.mcbs_nsets
+        lbi, ubi = self.calc_ci_bound_indices()
+        total_time = self.get_total_time()
+        transdat_ds = self.trans_h5group['transitions']
+        transdat_ibin = transdat_ds['initial_bin']
+        if not self.bf_mode:
+            transdat_niter = transdat_ds['n_iter']
+            transdat_in_range = (transdat_niter >= self.first_iter) & (transdat_niter <= self.last_iter)
+        durations = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
+        fpts = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
+        fluxes = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
+        rates = np.zeros((self.n_bins, self.n_bins), ciinfo_dtype)
+        syn_avg_durations = np.empty((n_sets,), np.float64)
+        syn_avg_fpts = np.empty((n_sets,), np.float64)
+        syn_avg_fluxes = np.empty((n_sets,), np.float64)
+        syn_avg_rates = np.empty((n_sets,), np.float64)
+        w_n_bins = len(str(self.n_bins))
+        w_n_sets = len(str(n_sets))
+        for ibin in self.analysis_initial_bins:
+            if self.bf_mode:
+                trans_ibin = transdat_ds[transdat_ibin == ibin]
+            else:
+                trans_ibin = transdat_ds[(transdat_ibin == ibin) & transdat_in_range]
+            for fbin in self.analysis_final_bins:
+                # trans_ifbins = trans_ibin[trans_ibin['final_bin'] == fbin]
+                trans_ifbins = np.extract(trans_ibin['final_bin'] == fbin, trans_ibin)
+                dlen = len(trans_ifbins)
+                if not dlen:
+                    continue
+                trans_weights = trans_ifbins['final_weight']
+                trans_durations = trans_ifbins['duration']
+                trans_fpts = trans_ifbins['fpt']
+                trans_ibinprobs = trans_ifbins['initial_bin_pop']
+                durations[ibin, fbin]['expectation'] = np.average(trans_durations, weights=trans_weights) * dt
+                fpts[ibin, fbin]['expectation'] = np.average(trans_fpts, weights=trans_weights) * dt
+                avg_flux = trans_weights.sum() / total_time
+                fluxes[ibin, fbin]['expectation'] = avg_flux
+                rates[ibin, fbin]['expectation'] = avg_flux / trans_ibinprobs.mean()
+                for iset in range(n_sets):
+                    westpa.rc.pstatus(
+                        '\r  {:{w_n_bins}d}->{:<{w_n_bins}d} set {:{w_n_sets}d}/{:<{w_n_sets}d}, set size {:<20d}'.format(
+                            ibin, fbin, iset + 1, n_sets, dlen, w_n_bins=w_n_bins, w_n_sets=w_n_sets
+                        ),
+                        end='',
+                    )
+                    westpa.rc.pflush()
+                    indices = self.rng.integers(dlen, size=(dlen,))
+                    # syn_weights   = trans_weights[indices]
+                    # syn_durations = trans_durations[indices]
+                    # syn_fpts      = trans_fpts[indices]
+                    # syn_ibinprobs = trans_ibinprobs[indices]
+                    syn_weights = trans_weights.take(indices)
+                    syn_durations = trans_durations.take(indices)
+                    syn_fpts = trans_fpts.take(indices)
+                    syn_ibinprobs = trans_ibinprobs.take(indices)
+                    syn_avg_durations[iset] = np.average(syn_durations, weights=syn_weights) * dt
+                    syn_avg_fpts[iset] = np.average(syn_fpts, weights=syn_weights) * dt
+                    syn_avg_fluxes[iset] = syn_weights.sum() / total_time
+                    syn_avg_rates[iset] = syn_avg_fluxes[iset] / syn_ibinprobs.mean()
+                    del indices, syn_weights, syn_durations, syn_ibinprobs, syn_fpts
+                syn_avg_durations.sort()
+                syn_avg_fpts.sort()
+                syn_avg_fluxes.sort()
+                syn_avg_rates.sort()
+                durations[ibin, fbin]['ci_lower'] = syn_avg_durations[lbi]
+                durations[ibin, fbin]['ci_upper'] = syn_avg_durations[ubi]
+                fpts[ibin, fbin]['ci_lower'] = syn_avg_fpts[lbi]
+                fpts[ibin, fbin]['ci_upper'] = syn_avg_fpts[ubi]
+                fluxes[ibin, fbin]['ci_lower'] = syn_avg_fluxes[lbi]
+                fluxes[ibin, fbin]['ci_upper'] = syn_avg_fluxes[ubi]
+                rates[ibin, fbin]['ci_lower'] = syn_avg_rates[lbi]
+                rates[ibin, fbin]['ci_upper'] = syn_avg_rates[ubi]
+                del trans_weights, trans_durations, trans_ibinprobs, trans_ifbins, trans_fpts
+            westpa.rc.pstatus()
+            del trans_ibin
+        for dsname, data in (('duration', durations), ('fpt', fpts), ('flux', fluxes), ('rate', rates)):
+            try:
+                del self.ttimes_group[dsname]
+            except KeyError:
+                pass
+            ds = self.ttimes_group.create_dataset(dsname, data=data)
+            attrs = ds.attrs
+            attrs['dt'] = dt
+            attrs['total_time'] = total_time
+            attrs['ci_alpha'] = self.mcbs_alpha
+            attrs['ci_n_sets'] = self.mcbs_nsets
+            if not self.bf_mode:
+                self.record_data_iter_range(ds)
+            self.record_data_binhash(ds)
+        attrs = self.ttimes_group.attrs
+        attrs = ds.attrs
+        attrs['dt'] = dt
+        attrs['total_time'] = total_time
+        attrs['ci_alpha'] = self.mcbs_alpha
+        attrs['ci_n_sets'] = self.mcbs_nsets
+        self.durations = durations
+        self.fluxes = fluxes
+        self.rates = rates
+        if not self.bf_mode:
+            self.record_data_iter_range(self.ttimes_group)
+        self.record_data_binhash(self.ttimes_group)
+    def summarize_stats(self):
+        for array, dsname, argname, title in (
+            (self.durations, 'duration', 'ed_stats_filename', 'event duration'),
+            (self.fpts, 'fpt', 'fpt_stats_filename', 'first passage time'),
+            (self.fluxes, 'flux', 'flux_stats_filename', 'flux'),
+            (self.rates, 'rate', 'rate_stats_filename', 'rate'),
+        ):
+            filename = getattr(self, argname)
+            if filename:
+                if array is None:
+                    try:
+                        array = self.ttimes_group[dsname]
+                    except KeyError:
+                        westpa.rc.pstatus('{} data not found in {}'.format(title, self.anal_h5name))
+                        continue
+                self.summarize_ci(
+                    filename,
+                    array,
+                    title,
+                    self.mcbs_display_confidence,
+                    headers=(not self.suppress_headers),
+                    labels=self.print_bin_labels,
+                )
+    def summarize_ci(self, filename, array, title, confidence, headers, labels):
+        format_2d = (
+            '{ibin:{mw}d}    {fbin:{mw}d}    {0:20.15g}    {1:20.15g}    {2:20.15g}    {3:20.15g}    {4:20.15g}    {5:20.15g}\n'
+        )
+        max_ibin_width = len(str(self.n_bins - 1))
+        outfile = open(filename, 'wt')
+        if headers:
+            outfile.write(
+                '''\
+# {title:} statistics
+# confidence interval = {confidence}%
+# ----
+# column 0: initial bin index
+# column 1: final bin index
+# column 2: lower bound of confidence interval
+# column 3: upper bound of confidence interval
+# column 4: width of confidence interval
+# column 5: relative width of confidence interval [abs(width/average)]
+# column 6: symmetrized error [max(upper-average, average-lower)]
+# ----
+'''.format(
+                    title=title, confidence=confidence
+                )
+            )
+            if labels:
+                self.write_bin_labels(outfile)
+                outfile.write('----\n')
+        for ibin in self.analysis_initial_bins:
+            for fbin in self.analysis_final_bins:
+                mean = array[ibin, fbin]['expectation']
+                lb = array[ibin, fbin]['ci_lower']
+                ub = array[ibin, fbin]['ci_upper']
+                ciwidth = ub - lb
+                relciwidth = abs(ciwidth / mean)
+                symmerr = max(mean - lb, ub - mean)
+                outfile.write(
+                    format_2d.format(
+                        *list(map(float, (mean, lb, ub, ciwidth, relciwidth, symmerr))), ibin=ibin, fbin=fbin, mw=max_ibin_width
+                    )
+                )
+    def main(self):
+        parser = argparse.ArgumentParser('w_ttimes', description=self.description)
+        westpa.rc.add_args(parser)
+        self.add_args(parser)
+        args = parser.parse_args()
+        westpa.rc.process_args(args, config_required=False)
+        self.process_args(args)
+        self.check_iter_range()
+        self.check_bin_selection()
+        self.open_analysis_backing()
+        self.ttimes_group = self.require_analysis_group('w_ttimes', replace=False)
+        self.require_bin_assignments()
+        self.require_transitions()
+        self.gen_stats()
+        self.summarize_stats()
+class WTTimesWE(
+    WTTimesBase,
+    CommonOutputMixin,
+    MCBSMixin,
+    KineticsAnalysisMixin,
+    TransitionAnalysisMixin,
+    BinningMixin,
+    IterRangeMixin,
+    WESTDataReaderMixin,
+    WESTAnalysisTool,
+):
+    description = 'Trace the WEST trajectory tree and report on transition kinetics.'
+    def __init__(self):
+        super().__init__()
+class WTTimesBF(
+    WTTimesBase, CommonOutputMixin, MCBSMixin, KineticsAnalysisMixin, BFTransitionAnalysisMixin, BFDataManager, WESTAnalysisTool
+):
+    description = 'Trace one or more brute force trajectories and report on transition kinetics.'
+    default_chunksize = 65536 * 4
+    def __init__(self):
+        super().__init__()
+        self.bf_mode = True
+        self.config_required = False
+        self.usecols = None
+        self.input_files = None
+    def check_iter_range(self):
+        pass  # do nothing, since we don't do iteration ranges for brute force
+    def get_total_time(self):
+        self.require_bf_h5file()
+        return np.add.reduce([self.get_traj_len(traj_id) - 1 for traj_id in range(self.get_n_trajs())]) * self.dt

westpa/oldtools/files.py ADDED Viewed

@@ -0,0 +1,34 @@
+import warnings
+import numpy
+def load_npy_or_text(filename):
+    '''Load an array from an existing .npy file, or read a text file and
+    convert to a NumPy array.  In either case, return a NumPy array.  If a
+    pickled NumPy dataset is found, memory-map it read-only.  If the specified
+    file does not contain a pickled NumPy array, attempt to read the file using
+    numpy.loadtxt(filename, **kwargs).'''
+    f = open(filename, 'rb')
+    try:
+        f.seek(0)
+    except IOError:
+        # Not seekable - assume a text stream
+        return numpy.loadtxt(filename)
+    else:
+        f.close()
+    # File is seekable
+    try:
+        # try to mmap it
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore")
+            return numpy.load(filename, 'r')
+    except IOError as e:
+        if 'Failed to interpret' in str(e):
+            pass
+        else:
+            raise
+    return numpy.loadtxt(filename)

westpa/oldtools/miscfn.py ADDED Viewed

@@ -0,0 +1,23 @@
+'''Miscellaneous support functions for WEST and WEST tools'''
+import re
+def parse_int_list(list_string):
+    '''Parse a simple list consisting of integers or ranges of integers separated by commas. Ranges are specified
+    as min:max, and include the maximum value (unlike Python's ``range``).  Duplicate values are ignored.
+    Returns the result as a sorted list.  Raises ValueError if the list cannot be parsed.'''
+    try:
+        entries = set()
+        fields = re.split(r'\s*[;,]\s*', list_string)
+        for field in fields:
+            if ':' in field:
+                lb, ub = list(map(int, re.split(r'\s*:\s*', field)))
+                entries.update(list(range(lb, ub + 1)))
+            else:
+                entries.add(int(field))
+    except (ValueError, TypeError):
+        raise ValueError('invalid integer range string {!r}'.format(list_string))
+    else:
+        return sorted(entries)

westpa/oldtools/stats/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from . import accumulator  # noqa
+from .accumulator import RunningStatsAccumulator  # noqa
+from . import mcbs  # noqa

westpa/oldtools/stats/accumulator.py ADDED Viewed

@@ -0,0 +1,35 @@
+import numpy
+NAN = float('nan')
+class RunningStatsAccumulator:
+    def __init__(self, shape, dtype=numpy.float64, count_dtype=numpy.uint, weight_dtype=numpy.float64, mask_value=NAN):
+        self.sum = numpy.zeros(shape, dtype)
+        self.sqsum = numpy.zeros(shape, dtype)
+        self.weight = numpy.zeros(shape, weight_dtype)
+        self.count = numpy.zeros(shape, count_dtype)
+        self.mask_value = mask_value
+    def incorporate(self, index, value, weight):
+        self.count[index] += 1
+        self.weight[index] += weight
+        self.sum[index] += weight * value
+        self.sqsum[index] += weight * value * value
+    def average(self):
+        valid = self.count > 0
+        avg = numpy.empty_like(self.sum)
+        avg[valid] = self.sum[valid] / self.weight[valid]
+        avg[~valid] = self.mask_value
+        return avg
+    mean = average
+    def std(self):
+        valid = self.count > 0
+        vavg = self.average()[valid]
+        std = numpy.empty_like(self.sqsum)
+        std[valid] = (self.sqsum[valid] / self.weight[valid] - vavg * vavg) ** 0.5
+        std[~valid] = self.mask_value
+        return std

westpa/oldtools/stats/edfs.py ADDED Viewed

@@ -0,0 +1,129 @@
+import numpy
+class EDF:
+    '''A class for creating and manipulating empirical distribution functions (cumulative
+    distribution functions derived from sample data).
+    '''
+    @staticmethod
+    def from_array(array):
+        edf = EDF(None, None)
+        edf.x = array[:, 0]
+        edf.F = array[:, 1]
+        edf.dF = numpy.diff(edf.F)
+        return edf
+    @staticmethod
+    def from_arrays(x, F):
+        edf = EDF(None, None)
+        edf.x = x
+        edf.F = F
+        edf.dF = numpy.diff(edf.F)
+        return edf
+    def __init__(self, values, weights=None):
+        '''Construct a new EDF from the given values and (optionally) weights.'''
+        if values is None:
+            self.x = None
+            self.F = None
+            self.dF = None
+            return
+        if weights is None:
+            weights = numpy.ones((len(values)), numpy.float64)
+        elif numpy.isscalar(weights):
+            tweights = numpy.empty((len(values)), numpy.float64)
+            tweights[:] = weights
+            weights = tweights
+        else:
+            if len(weights) != len(values):
+                raise TypeError('values and weights have different lengths')
+        # Sort values
+        sort_indices = numpy.argsort(values, kind='stable')
+        values = values[sort_indices]
+        weights = weights[sort_indices]
+        # Determine unique abcissae; this is essentially stolen from numpy.lib.arraysetops.unique()
+        x = values[numpy.concatenate(([True], values[1:] != values[:-1]))]
+        F = numpy.empty((len(x),), numpy.float64)
+        # ``values`` is arranged in increasing order, so we can walk along it and add up weights
+        # as we go
+        ival_last = 0
+        ival = 0
+        for ibin in range(0, len(x)):
+            while ival < len(values) and values[ival] <= x[ibin]:
+                ival += 1
+            F[ibin] = weights[ival_last:ival].sum()
+            ival_last = ival
+        F = numpy.add.accumulate(F)
+        F /= F[-1]
+        self.x = x
+        self.F = F
+        self.dF = numpy.diff(F)
+    def __len__(self):
+        return len(self.x)
+    def __call__(self, x):
+        '''Evaluate this EDF at the given abcissae.'''
+        indices = numpy.digitize(x, self.x)
+        indices[indices >= len(self.x)] = len(self.x) - 1
+        return self.F[indices]
+    def as_array(self):
+        '''Return this EDF as a (N,2) array, where N is the number of unique values passed to
+        the constructor.  Numpy type casting rules are applied (so, for instance, integral abcissae
+        are converted to floating-point values).'''
+        result = numpy.empty((len(self.F), 2), dtype=numpy.result_type(self.x, self.F))
+        result[:, 0] = self.x
+        result[:, 1] = self.F
+        return result
+    def quantiles(self, p):
+        '''Treating the EDF as a quantile function, return the values of the (statistical) variable whose
+        probabilities are at least p.  That is, Q(p) = inf {x: p <= F(x) }.'''
+        indices = numpy.searchsorted(self.F, p)
+        indices[indices >= len(self.x)] = len(self.x) - 1
+        return self.x[indices]
+    def quantile(self, p):
+        return self.quantiles([p])[0]
+    def median(self):
+        return self.quantiles([0.5])[0]
+    def moment(self, n):
+        '''Calculate the nth moment of this probability distribution
+        <x^n> = int_{-inf}^{inf} x^n dF(x)
+        '''
+        if n == 1:
+            return (self.x[:-1] * self.dF).sum()
+        else:
+            return (self.x[:-1] ** n * self.dF).sum()
+    def cmoment(self, n):
+        '''Calculate the nth central moment of this probability distribution'''
+        if n < 2:
+            return 0
+        return ((self.x[:-1] - self.moment(1)) ** n * self.dF).sum()
+    def mean(self):
+        return self.moment(1)
+    def var(self):
+        '''Return the second central moment of this probability distribution.'''
+        return self.cmoment(2)
+    def std(self):
+        '''Return the standard deviation (root of the variance) of this probability distribution.'''
+        return self.cmoment(2) ** 0.5

westpa/oldtools/stats/mcbs.py ADDED Viewed

@@ -0,0 +1,96 @@
+'''
+Tools for Monte Carlo bootstrap error analysis
+'''
+import math
+import numpy as np
+from numpy.random import Generator, MT19937
+def msort(input_array):
+    return np.sort(input_array, axis=0)
+def add_mcbs_options(parser):
+    '''Add arguments concerning Monte Carlo bootstrap (``confidence`` and ``bssize``) to the given parser'''
+    group = parser.add_argument_group('bootstrapping options')
+    group.add_argument(
+        '--confidence',
+        dest='confidence',
+        type=float,
+        default=0.95,
+        help='Construct a confidence interval of width CONFIDENCE (default: 0.95=95%%)',
+    )
+    group.add_argument(
+        '--bssize',
+        dest='bssize',
+        type=int,
+        help='Use a bootstrap of BSSIZE samples to calculate error (default: chosen from confidence)',
+    )
+def get_bssize(alpha):
+    '''Return a bootstrap data set size appropriate for the given confidence level'''
+    return int(10 ** (math.ceil(-math.log10(alpha)) + 1))
+def bootstrap_ci(estimator, data, alpha, n_sets=None, args=(), kwargs={}, sort=msort, extended_output=False):
+    '''Perform a Monte Carlo bootstrap of a (1-alpha) confidence interval for the given ``estimator``.
+    Returns (fhat, ci_lower, ci_upper), where fhat is the result of ``estimator(data, *args, **kwargs)``,
+    and ``ci_lower`` and ``ci_upper`` are the lower and upper bounds of the surrounding confidence
+    interval, calculated by calling ``estimator(syndata, *args, **kwargs)`` on each synthetic data
+    set ``syndata``.  If ``n_sets`` is provided, that is the number of synthetic data sets generated,
+    otherwise an appropriate size is selected automatically (see ``get_bssize()``).
+    ``sort``, if given, is applied to sort the results of calling ``estimator`` on each
+    synthetic data set prior to obtaining the confidence interval.
+    Individual entries in synthetic data sets are selected by the first index of ``data``, allowing this
+    function to be used on arrays of multidimensional data.
+    If ``extended_output`` is True (by default not), instead of returning (fhat, lb, ub), this function returns
+    (fhat, lb, ub, ub-lb, abs((ub-lb)/fhat), and max(ub-fhat,fhat-lb)) (that is, the estimated value, the
+    lower and upper bounds of the confidence interval, the width of the confidence interval, the relative
+    width of the confidence interval, and the symmetrized error bar of the confidence interval).'''
+    data = np.asanyarray(data)
+    fhat = estimator(data, *args, **kwargs)
+    try:
+        estimator_shape = fhat.shape
+    except AttributeError:
+        estimator_shape = ()
+    try:
+        estimator_dtype = fhat.dtype
+    except AttributeError:
+        estimator_dtype = type(fhat)
+    dlen = len(data)
+    n_sets = n_sets or get_bssize(alpha)
+    f_synth = np.empty((n_sets,) + estimator_shape, dtype=estimator_dtype)
+    rng = Generator(MT19937())
+    for i in range(0, n_sets):
+        indices = rng.integers(dlen, size=(dlen,))
+        f_synth[i] = estimator(data[indices], *args, **kwargs)
+    f_synth_sorted = sort(f_synth)
+    lbi = int(math.floor(n_sets * alpha / 2))
+    ubi = int(math.ceil(n_sets * (1 - alpha / 2)))
+    lb = f_synth_sorted[lbi]
+    ub = f_synth_sorted[ubi]
+    try:
+        if extended_output:
+            return (fhat, lb, ub, ub - lb, abs((ub - lb) / fhat) if fhat else 0, max(ub - fhat, fhat - lb))
+        else:
+            return (fhat, lb, ub)
+    finally:
+        # Do a little explicit memory management
+        del f_synth, f_synth_sorted