westpa 2022.12__cp313-cp313-macosx_10_13_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of westpa might be problematic. Click here for more details.
- westpa/__init__.py +14 -0
- westpa/_version.py +21 -0
- westpa/analysis/__init__.py +5 -0
- westpa/analysis/core.py +746 -0
- westpa/analysis/statistics.py +27 -0
- westpa/analysis/trajectories.py +360 -0
- westpa/cli/__init__.py +0 -0
- westpa/cli/core/__init__.py +0 -0
- westpa/cli/core/w_fork.py +152 -0
- westpa/cli/core/w_init.py +230 -0
- westpa/cli/core/w_run.py +77 -0
- westpa/cli/core/w_states.py +212 -0
- westpa/cli/core/w_succ.py +99 -0
- westpa/cli/core/w_truncate.py +68 -0
- westpa/cli/tools/__init__.py +0 -0
- westpa/cli/tools/ploterr.py +506 -0
- westpa/cli/tools/plothist.py +706 -0
- westpa/cli/tools/w_assign.py +596 -0
- westpa/cli/tools/w_bins.py +166 -0
- westpa/cli/tools/w_crawl.py +119 -0
- westpa/cli/tools/w_direct.py +547 -0
- westpa/cli/tools/w_dumpsegs.py +94 -0
- westpa/cli/tools/w_eddist.py +506 -0
- westpa/cli/tools/w_fluxanl.py +376 -0
- westpa/cli/tools/w_ipa.py +833 -0
- westpa/cli/tools/w_kinavg.py +127 -0
- westpa/cli/tools/w_kinetics.py +96 -0
- westpa/cli/tools/w_multi_west.py +414 -0
- westpa/cli/tools/w_ntop.py +213 -0
- westpa/cli/tools/w_pdist.py +515 -0
- westpa/cli/tools/w_postanalysis_matrix.py +82 -0
- westpa/cli/tools/w_postanalysis_reweight.py +53 -0
- westpa/cli/tools/w_red.py +491 -0
- westpa/cli/tools/w_reweight.py +780 -0
- westpa/cli/tools/w_select.py +226 -0
- westpa/cli/tools/w_stateprobs.py +111 -0
- westpa/cli/tools/w_trace.py +599 -0
- westpa/core/__init__.py +0 -0
- westpa/core/_rc.py +673 -0
- westpa/core/binning/__init__.py +55 -0
- westpa/core/binning/_assign.cpython-313-darwin.so +0 -0
- westpa/core/binning/assign.py +455 -0
- westpa/core/binning/binless.py +96 -0
- westpa/core/binning/binless_driver.py +54 -0
- westpa/core/binning/binless_manager.py +190 -0
- westpa/core/binning/bins.py +47 -0
- westpa/core/binning/mab.py +506 -0
- westpa/core/binning/mab_driver.py +54 -0
- westpa/core/binning/mab_manager.py +198 -0
- westpa/core/data_manager.py +1694 -0
- westpa/core/extloader.py +74 -0
- westpa/core/h5io.py +995 -0
- westpa/core/kinetics/__init__.py +24 -0
- westpa/core/kinetics/_kinetics.cpython-313-darwin.so +0 -0
- westpa/core/kinetics/events.py +147 -0
- westpa/core/kinetics/matrates.py +156 -0
- westpa/core/kinetics/rate_averaging.py +266 -0
- westpa/core/progress.py +218 -0
- westpa/core/propagators/__init__.py +54 -0
- westpa/core/propagators/executable.py +719 -0
- westpa/core/reweight/__init__.py +14 -0
- westpa/core/reweight/_reweight.cpython-313-darwin.so +0 -0
- westpa/core/reweight/matrix.py +126 -0
- westpa/core/segment.py +119 -0
- westpa/core/sim_manager.py +835 -0
- westpa/core/states.py +359 -0
- westpa/core/systems.py +93 -0
- westpa/core/textio.py +74 -0
- westpa/core/trajectory.py +330 -0
- westpa/core/we_driver.py +910 -0
- westpa/core/wm_ops.py +43 -0
- westpa/core/yamlcfg.py +391 -0
- westpa/fasthist/__init__.py +34 -0
- westpa/fasthist/_fasthist.cpython-313-darwin.so +0 -0
- westpa/mclib/__init__.py +271 -0
- westpa/mclib/__main__.py +28 -0
- westpa/mclib/_mclib.cpython-313-darwin.so +0 -0
- westpa/oldtools/__init__.py +4 -0
- westpa/oldtools/aframe/__init__.py +35 -0
- westpa/oldtools/aframe/atool.py +75 -0
- westpa/oldtools/aframe/base_mixin.py +26 -0
- westpa/oldtools/aframe/binning.py +178 -0
- westpa/oldtools/aframe/data_reader.py +560 -0
- westpa/oldtools/aframe/iter_range.py +200 -0
- westpa/oldtools/aframe/kinetics.py +117 -0
- westpa/oldtools/aframe/mcbs.py +153 -0
- westpa/oldtools/aframe/output.py +39 -0
- westpa/oldtools/aframe/plotting.py +90 -0
- westpa/oldtools/aframe/trajwalker.py +126 -0
- westpa/oldtools/aframe/transitions.py +469 -0
- westpa/oldtools/cmds/__init__.py +0 -0
- westpa/oldtools/cmds/w_ttimes.py +361 -0
- westpa/oldtools/files.py +34 -0
- westpa/oldtools/miscfn.py +23 -0
- westpa/oldtools/stats/__init__.py +4 -0
- westpa/oldtools/stats/accumulator.py +35 -0
- westpa/oldtools/stats/edfs.py +129 -0
- westpa/oldtools/stats/mcbs.py +96 -0
- westpa/tools/__init__.py +33 -0
- westpa/tools/binning.py +472 -0
- westpa/tools/core.py +340 -0
- westpa/tools/data_reader.py +159 -0
- westpa/tools/dtypes.py +31 -0
- westpa/tools/iter_range.py +198 -0
- westpa/tools/kinetics_tool.py +340 -0
- westpa/tools/plot.py +283 -0
- westpa/tools/progress.py +17 -0
- westpa/tools/selected_segs.py +154 -0
- westpa/tools/wipi.py +751 -0
- westpa/trajtree/__init__.py +4 -0
- westpa/trajtree/_trajtree.cpython-313-darwin.so +0 -0
- westpa/trajtree/trajtree.py +117 -0
- westpa/westext/__init__.py +0 -0
- westpa/westext/adaptvoronoi/__init__.py +3 -0
- westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
- westpa/westext/hamsm_restarting/__init__.py +3 -0
- westpa/westext/hamsm_restarting/example_overrides.py +35 -0
- westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
- westpa/westext/stringmethod/__init__.py +11 -0
- westpa/westext/stringmethod/fourier_fitting.py +69 -0
- westpa/westext/stringmethod/string_driver.py +253 -0
- westpa/westext/stringmethod/string_method.py +306 -0
- westpa/westext/weed/BinCluster.py +180 -0
- westpa/westext/weed/ProbAdjustEquil.py +100 -0
- westpa/westext/weed/UncertMath.py +247 -0
- westpa/westext/weed/__init__.py +10 -0
- westpa/westext/weed/weed_driver.py +192 -0
- westpa/westext/wess/ProbAdjust.py +101 -0
- westpa/westext/wess/__init__.py +6 -0
- westpa/westext/wess/wess_driver.py +217 -0
- westpa/work_managers/__init__.py +57 -0
- westpa/work_managers/core.py +396 -0
- westpa/work_managers/environment.py +134 -0
- westpa/work_managers/mpi.py +318 -0
- westpa/work_managers/processes.py +187 -0
- westpa/work_managers/serial.py +28 -0
- westpa/work_managers/threads.py +79 -0
- westpa/work_managers/zeromq/__init__.py +20 -0
- westpa/work_managers/zeromq/core.py +641 -0
- westpa/work_managers/zeromq/node.py +131 -0
- westpa/work_managers/zeromq/work_manager.py +526 -0
- westpa/work_managers/zeromq/worker.py +320 -0
- westpa-2022.12.dist-info/AUTHORS +22 -0
- westpa-2022.12.dist-info/LICENSE +21 -0
- westpa-2022.12.dist-info/METADATA +193 -0
- westpa-2022.12.dist-info/RECORD +149 -0
- westpa-2022.12.dist-info/WHEEL +6 -0
- westpa-2022.12.dist-info/entry_points.txt +29 -0
- westpa-2022.12.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,560 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import h5py
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
import westpa
|
|
7
|
+
from westpa.core.segment import Segment
|
|
8
|
+
from westpa.oldtools.aframe import AnalysisMixin
|
|
9
|
+
from westpa.oldtools.miscfn import parse_int_list
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class WESTDataReaderMixin(AnalysisMixin):
|
|
15
|
+
'''A mixin for analysis requiring access to the HDF5 files generated during a WEST run.'''
|
|
16
|
+
|
|
17
|
+
def __init__(self):
|
|
18
|
+
super().__init__()
|
|
19
|
+
|
|
20
|
+
self.data_manager = None
|
|
21
|
+
self.west_h5name = None
|
|
22
|
+
|
|
23
|
+
# Whether pcoord caching is active
|
|
24
|
+
self.__cache_pcoords = False
|
|
25
|
+
|
|
26
|
+
# Cached items
|
|
27
|
+
self.__c_summary = None
|
|
28
|
+
self.__c_iter_groups = dict()
|
|
29
|
+
self.__c_seg_id_ranges = dict()
|
|
30
|
+
self.__c_seg_indices = dict()
|
|
31
|
+
self.__c_wtg_parent_arrays = dict()
|
|
32
|
+
self.__c_parent_arrays = dict()
|
|
33
|
+
self.__c_pcoord_arrays = dict()
|
|
34
|
+
self.__c_pcoord_datasets = dict()
|
|
35
|
+
|
|
36
|
+
def add_args(self, parser, upcall=True):
|
|
37
|
+
if upcall:
|
|
38
|
+
try:
|
|
39
|
+
upcall = super().add_args
|
|
40
|
+
except AttributeError:
|
|
41
|
+
pass
|
|
42
|
+
else:
|
|
43
|
+
upcall(parser)
|
|
44
|
+
|
|
45
|
+
group = parser.add_argument_group('WEST input data options')
|
|
46
|
+
group.add_argument(
|
|
47
|
+
'-W',
|
|
48
|
+
'--west-data',
|
|
49
|
+
dest='west_h5name',
|
|
50
|
+
metavar='WEST_H5FILE',
|
|
51
|
+
help='''Take WEST data from WEST_H5FILE (default: read from the HDF5 file specified in west.cfg).''',
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def process_args(self, args, upcall=True):
|
|
55
|
+
if args.west_h5name:
|
|
56
|
+
self.west_h5name = args.west_h5name
|
|
57
|
+
else:
|
|
58
|
+
westpa.rc.config.require(['west', 'data', 'west_data_file'])
|
|
59
|
+
self.west_h5name = westpa.rc.config.get_path(['west', 'data', 'west_data_file'])
|
|
60
|
+
|
|
61
|
+
westpa.rc.pstatus("Using WEST data from '{}'".format(self.west_h5name))
|
|
62
|
+
|
|
63
|
+
self.data_manager = westpa.rc.get_data_manager()
|
|
64
|
+
self.data_manager.we_h5filename = self.west_h5name
|
|
65
|
+
self.data_manager.open_backing(mode='r')
|
|
66
|
+
|
|
67
|
+
if upcall:
|
|
68
|
+
try:
|
|
69
|
+
upfunc = super().process_args
|
|
70
|
+
except AttributeError:
|
|
71
|
+
pass
|
|
72
|
+
else:
|
|
73
|
+
upfunc(args)
|
|
74
|
+
|
|
75
|
+
def clear_run_cache(self):
|
|
76
|
+
del self.__c_summary
|
|
77
|
+
del self.__c_iter_groups, self.__c_seg_id_ranges, self.__c_seg_indices, self.__c_parent_arrays, self.__c_parent_arrays
|
|
78
|
+
del self.__c_pcoord_arrays, self.__c_pcoord_datasets
|
|
79
|
+
|
|
80
|
+
self.__c_summary = None
|
|
81
|
+
self.__c_iter_groups = dict()
|
|
82
|
+
self.__c_seg_id_ranges = dict()
|
|
83
|
+
self.__c_seg_indices = dict()
|
|
84
|
+
self.__c_parent_arrays = dict()
|
|
85
|
+
self.__c_wtg_parent_arrays = dict()
|
|
86
|
+
self.__c_pcoord_arrays = dict()
|
|
87
|
+
self.__c_pcoord_datasets = dict()
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def cache_pcoords(self):
|
|
91
|
+
'''Whether or not to cache progress coordinate data. While caching this data
|
|
92
|
+
can significantly speed up some analysis operations, this requires
|
|
93
|
+
copious RAM.
|
|
94
|
+
|
|
95
|
+
Setting this to False when it was formerly True will release any cached data.
|
|
96
|
+
'''
|
|
97
|
+
return self.__cache_pcoords
|
|
98
|
+
|
|
99
|
+
@cache_pcoords.setter
|
|
100
|
+
def cache_pcoords(self, cache):
|
|
101
|
+
self.__cache_pcoords = cache
|
|
102
|
+
|
|
103
|
+
if not cache:
|
|
104
|
+
del self.__c_pcoord_arrays
|
|
105
|
+
self.__c_pcoord_arrays = dict()
|
|
106
|
+
|
|
107
|
+
def get_summary_table(self):
|
|
108
|
+
if self.__c_summary is None:
|
|
109
|
+
self.__c_summary = self.data_manager.we_h5file['/summary'][...]
|
|
110
|
+
return self.__c_summary
|
|
111
|
+
|
|
112
|
+
def get_iter_group(self, n_iter):
|
|
113
|
+
'''Return the HDF5 group corresponding to ``n_iter``'''
|
|
114
|
+
try:
|
|
115
|
+
return self.__c_iter_groups[n_iter]
|
|
116
|
+
except KeyError:
|
|
117
|
+
iter_group = self.data_manager.get_iter_group(n_iter)
|
|
118
|
+
return iter_group
|
|
119
|
+
|
|
120
|
+
def get_segments(self, n_iter, include_pcoords=True):
|
|
121
|
+
'''Return all segments present in iteration n_iter'''
|
|
122
|
+
return self.get_segments_by_id(n_iter, self.get_seg_ids(n_iter, None), include_pcoords)
|
|
123
|
+
|
|
124
|
+
def get_segments_by_id(self, n_iter, seg_ids, include_pcoords=True):
|
|
125
|
+
'''Get segments from the data manager, employing caching where possible'''
|
|
126
|
+
|
|
127
|
+
if len(seg_ids) == 0:
|
|
128
|
+
return []
|
|
129
|
+
|
|
130
|
+
seg_index = self.get_seg_index(n_iter)
|
|
131
|
+
all_wtg_parent_ids = self.get_wtg_parent_array(n_iter)
|
|
132
|
+
|
|
133
|
+
segments = []
|
|
134
|
+
|
|
135
|
+
if include_pcoords:
|
|
136
|
+
pcoords = self.get_pcoords(n_iter, seg_ids)
|
|
137
|
+
|
|
138
|
+
for isegid, seg_id in enumerate(seg_ids):
|
|
139
|
+
row = seg_index[seg_id]
|
|
140
|
+
parents_offset = row['wtg_offset']
|
|
141
|
+
n_parents = row['wtg_n_parents']
|
|
142
|
+
segment = Segment(
|
|
143
|
+
seg_id=seg_id,
|
|
144
|
+
n_iter=n_iter,
|
|
145
|
+
status=row['status'],
|
|
146
|
+
endpoint_type=row['endpoint_type'],
|
|
147
|
+
walltime=row['walltime'],
|
|
148
|
+
cputime=row['cputime'],
|
|
149
|
+
weight=row['weight'],
|
|
150
|
+
)
|
|
151
|
+
if include_pcoords:
|
|
152
|
+
segment.pcoord = pcoords[isegid]
|
|
153
|
+
|
|
154
|
+
parent_ids = all_wtg_parent_ids[parents_offset : parents_offset + n_parents]
|
|
155
|
+
segment.wtg_parent_ids = {int(parent_id) for parent_id in parent_ids}
|
|
156
|
+
segment.parent_id = int(parent_ids[0])
|
|
157
|
+
segments.append(segment)
|
|
158
|
+
|
|
159
|
+
return segments
|
|
160
|
+
|
|
161
|
+
def get_children(self, segment, include_pcoords=True):
|
|
162
|
+
parents = self.get_parent_array(segment.n_iter + 1)
|
|
163
|
+
seg_ids = self.get_seg_ids(segment.n_iter + 1, parents == segment.seg_id)
|
|
164
|
+
return self.get_segments_by_id(segment.n_iter + 1, seg_ids, include_pcoords)
|
|
165
|
+
|
|
166
|
+
def get_seg_index(self, n_iter):
|
|
167
|
+
try:
|
|
168
|
+
return self.__c_seg_indices[n_iter]
|
|
169
|
+
except KeyError:
|
|
170
|
+
seg_index = self.__c_seg_indices[n_iter] = self.get_iter_group(n_iter)['seg_index'][...]
|
|
171
|
+
return seg_index
|
|
172
|
+
|
|
173
|
+
def get_wtg_parent_array(self, n_iter):
|
|
174
|
+
try:
|
|
175
|
+
return self.__c_wtg_parent_arrays[n_iter]
|
|
176
|
+
except KeyError:
|
|
177
|
+
parent_array = self.__c_wtg_parent_arrays[n_iter] = self.get_iter_group(n_iter)['wtgraph'][...]
|
|
178
|
+
return parent_array
|
|
179
|
+
|
|
180
|
+
def get_parent_array(self, n_iter):
|
|
181
|
+
try:
|
|
182
|
+
return self.__c_parent_arrays[n_iter]
|
|
183
|
+
except KeyError:
|
|
184
|
+
parent_array = self.get_seg_index(n_iter)['parent_id']
|
|
185
|
+
self.__c_parent_arrays[n_iter] = parent_array
|
|
186
|
+
return parent_array
|
|
187
|
+
|
|
188
|
+
def get_pcoord_array(self, n_iter):
|
|
189
|
+
try:
|
|
190
|
+
return self.__c_pcoord_arrays[n_iter]
|
|
191
|
+
except KeyError:
|
|
192
|
+
pcoords = self.__c_pcoord_arrays[n_iter] = self.get_iter_group(n_iter)['pcoord'][...]
|
|
193
|
+
return pcoords
|
|
194
|
+
|
|
195
|
+
def get_pcoord_dataset(self, n_iter):
|
|
196
|
+
try:
|
|
197
|
+
return self.__c_pcoord_datasets[n_iter]
|
|
198
|
+
except KeyError:
|
|
199
|
+
pcoord_ds = self.__c_pcoord_datasets[n_iter] = self.get_iter_group(n_iter)['pcoord']
|
|
200
|
+
return pcoord_ds
|
|
201
|
+
|
|
202
|
+
def get_pcoords(self, n_iter, seg_ids):
|
|
203
|
+
if self.__cache_pcoords:
|
|
204
|
+
pcarray = self.get_pcoord_array(n_iter)
|
|
205
|
+
return [pcarray[seg_id, ...] for seg_id in seg_ids]
|
|
206
|
+
else:
|
|
207
|
+
return self.get_pcoord_dataset(n_iter)[list(seg_ids), ...]
|
|
208
|
+
|
|
209
|
+
def get_seg_ids(self, n_iter, bool_array=None):
|
|
210
|
+
try:
|
|
211
|
+
all_ids = self.__c_seg_id_ranges[n_iter]
|
|
212
|
+
except KeyError:
|
|
213
|
+
all_ids = self.__c_seg_id_ranges[n_iter] = np.arange(0, len(self.get_seg_index(n_iter)), dtype=np.uint32)
|
|
214
|
+
|
|
215
|
+
if bool_array is None:
|
|
216
|
+
return all_ids
|
|
217
|
+
else:
|
|
218
|
+
seg_ids = all_ids[bool_array]
|
|
219
|
+
try:
|
|
220
|
+
if len(seg_ids) == 0:
|
|
221
|
+
return []
|
|
222
|
+
except TypeError:
|
|
223
|
+
# Not iterable, for some bizarre reason
|
|
224
|
+
return [seg_ids]
|
|
225
|
+
else:
|
|
226
|
+
return seg_ids
|
|
227
|
+
|
|
228
|
+
def get_created_seg_ids(self, n_iter):
|
|
229
|
+
'''Return a list of seg_ids corresponding to segments which were created for the given iteration (are not
|
|
230
|
+
continuations).'''
|
|
231
|
+
|
|
232
|
+
# Created segments have parent_id < 0
|
|
233
|
+
parent_ids = self.get_parent_array(n_iter)
|
|
234
|
+
return self.get_seg_ids(n_iter, parent_ids < 0)
|
|
235
|
+
|
|
236
|
+
def max_iter_segs_in_range(self, first_iter, last_iter):
|
|
237
|
+
'''Return the maximum number of segments present in any iteration in the range selected'''
|
|
238
|
+
n_particles = self.get_summary_table()['n_particles']
|
|
239
|
+
return n_particles[first_iter - 1 : last_iter].max()
|
|
240
|
+
|
|
241
|
+
def total_segs_in_range(self, first_iter, last_iter):
|
|
242
|
+
'''Return the total number of segments present in all iterations in the range selected'''
|
|
243
|
+
n_particles = self.get_summary_table()['n_particles']
|
|
244
|
+
return n_particles[first_iter - 1 : last_iter].sum()
|
|
245
|
+
|
|
246
|
+
def get_pcoord_len(self, n_iter):
|
|
247
|
+
'''Get the length of the progress coordinate array for the given iteration.'''
|
|
248
|
+
pcoord_ds = self.get_pcoord_dataset(n_iter)
|
|
249
|
+
return pcoord_ds.shape[1]
|
|
250
|
+
|
|
251
|
+
def get_total_time(self, first_iter=None, last_iter=None, dt=None):
|
|
252
|
+
'''Return the total amount of simulation time spanned between first_iter and last_iter (inclusive).'''
|
|
253
|
+
first_iter = first_iter or self.first_iter
|
|
254
|
+
last_iter = last_iter or self.last_iter
|
|
255
|
+
dt = dt or getattr(self, 'dt', 1.0)
|
|
256
|
+
|
|
257
|
+
total_len = 0
|
|
258
|
+
for n_iter in range(first_iter, last_iter + 1):
|
|
259
|
+
total_len += self.get_pcoord_len(n_iter) - 1
|
|
260
|
+
return total_len * dt
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
class ExtDataReaderMixin(AnalysisMixin):
|
|
264
|
+
'''An external data reader, primarily designed for reading brute force data, but also suitable
|
|
265
|
+
for any auxiliary datasets required for analysis.
|
|
266
|
+
'''
|
|
267
|
+
|
|
268
|
+
default_chunksize = 8192
|
|
269
|
+
|
|
270
|
+
def __init__(self):
|
|
271
|
+
super().__init__()
|
|
272
|
+
|
|
273
|
+
self.ext_input_nargs = '+'
|
|
274
|
+
self.ext_input_filenames = []
|
|
275
|
+
self.ext_input_chunksize = self.default_chunksize
|
|
276
|
+
self.ext_input_usecols = None
|
|
277
|
+
self.ext_input_comment_regexp = None
|
|
278
|
+
self.ext_input_sep_regexp = None
|
|
279
|
+
|
|
280
|
+
def add_args(self, parser, upcall=True):
|
|
281
|
+
if upcall:
|
|
282
|
+
try:
|
|
283
|
+
upcall = super().add_args
|
|
284
|
+
except AttributeError:
|
|
285
|
+
pass
|
|
286
|
+
else:
|
|
287
|
+
upcall(parser)
|
|
288
|
+
|
|
289
|
+
input_options = parser.add_argument_group('external data input options')
|
|
290
|
+
input_options.add_argument(
|
|
291
|
+
'datafiles',
|
|
292
|
+
nargs=self.ext_input_nargs,
|
|
293
|
+
metavar='DATAFILE',
|
|
294
|
+
help='''Data file(s) to analyze, either text or Numpy (.npy or .npz) format.
|
|
295
|
+
Uncompressed numpy files will be memory-mapped, allowing analysis of data larger than
|
|
296
|
+
available RAM (though not larger than the available address space).''',
|
|
297
|
+
)
|
|
298
|
+
input_options.add_argument(
|
|
299
|
+
'--usecols',
|
|
300
|
+
dest='usecols',
|
|
301
|
+
metavar='COLUMNS',
|
|
302
|
+
type=parse_int_list,
|
|
303
|
+
help='''Use only the given COLUMNS from the input file(s), e.g. "0", "0,1",
|
|
304
|
+
"0:5,7,9:10".''',
|
|
305
|
+
)
|
|
306
|
+
input_options.add_argument(
|
|
307
|
+
'--chunksize',
|
|
308
|
+
dest='chunksize',
|
|
309
|
+
type=int,
|
|
310
|
+
default=self.default_chunksize,
|
|
311
|
+
help='''Process input data in blocks of size CHUNKSIZE. This will only reduce memory
|
|
312
|
+
requirements when using uncompressed Numpy (.npy) format input. (Default: %(default)d.)''',
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
def process_args(self, args, upcall=True):
|
|
316
|
+
if args.usecols:
|
|
317
|
+
westpa.rc.pstatus('Using only the following columns from external input: {!s}'.format(args.usecols))
|
|
318
|
+
self.ext_input_usecols = args.usecols
|
|
319
|
+
else:
|
|
320
|
+
self.ext_input_usecols = None
|
|
321
|
+
|
|
322
|
+
self.ext_input_filenames = args.datafiles
|
|
323
|
+
self.ext_input_chunksize = args.chunksize or self.default_chunksize
|
|
324
|
+
|
|
325
|
+
if upcall:
|
|
326
|
+
try:
|
|
327
|
+
upfunc = super().process_args
|
|
328
|
+
except AttributeError:
|
|
329
|
+
pass
|
|
330
|
+
else:
|
|
331
|
+
upfunc(args)
|
|
332
|
+
|
|
333
|
+
def is_npy(self, filename):
|
|
334
|
+
with open(filename, 'rb') as fileobj:
|
|
335
|
+
first_bytes = fileobj.read(len(np.lib.format.MAGIC_PREFIX))
|
|
336
|
+
|
|
337
|
+
if first_bytes == np.lib.format.MAGIC_PREFIX:
|
|
338
|
+
return True
|
|
339
|
+
else:
|
|
340
|
+
return False
|
|
341
|
+
|
|
342
|
+
def load_npy_or_text(self, filename):
|
|
343
|
+
'''Load an array from an existing .npy file, or read a text file and
|
|
344
|
+
convert to a NumPy array. In either case, return a NumPy array. If a
|
|
345
|
+
pickled NumPy dataset is found, memory-map it read-only. If the specified
|
|
346
|
+
file does not contain a pickled NumPy array, attempt to read the file using
|
|
347
|
+
numpy.loadtxt(filename).'''
|
|
348
|
+
|
|
349
|
+
if self.is_npy(filename):
|
|
350
|
+
return np.load(filename, 'r')
|
|
351
|
+
else:
|
|
352
|
+
return np.loadtxt(filename)
|
|
353
|
+
|
|
354
|
+
def text_to_h5dataset(self, fileobj, group, dsname, dtype=np.float64, skiprows=0, usecols=None, chunksize=None):
|
|
355
|
+
'''Read text-format data from the given filename or file-like object ``fileobj`` and write to a newly-created dataset
|
|
356
|
+
called ``dsname`` in the HDF5 group ``group``. The data is stored as type ``dtype``. By default, the shape is
|
|
357
|
+
taken as (number of lines, number of columns); columns can be omitted by specifying a list for ``usecols``,
|
|
358
|
+
and lines can be skipped by using ``skiprows``. Data is read in chunks of ``chunksize`` rows.'''
|
|
359
|
+
|
|
360
|
+
try:
|
|
361
|
+
fileobj.readline
|
|
362
|
+
except AttributeError:
|
|
363
|
+
fileobj = open(fileobj, 'rt')
|
|
364
|
+
|
|
365
|
+
usecols = usecols or self.usecols
|
|
366
|
+
chunksize = chunksize or self.ext_input_chunksize
|
|
367
|
+
|
|
368
|
+
linenumber = 0
|
|
369
|
+
for iskip in range(skiprows or 0):
|
|
370
|
+
fileobj.readline()
|
|
371
|
+
linenumber += 1
|
|
372
|
+
|
|
373
|
+
nrows = 0
|
|
374
|
+
irow = 0
|
|
375
|
+
ncols_input = None # number of columns in input
|
|
376
|
+
ncols_store = None # number of columns to store
|
|
377
|
+
databuffer = None
|
|
378
|
+
dataset = None
|
|
379
|
+
|
|
380
|
+
re_split_comments = self.ext_input_comment_regexp
|
|
381
|
+
re_split_fields = self.ext_input_sep_regexp
|
|
382
|
+
|
|
383
|
+
for line in fileobj:
|
|
384
|
+
linenumber += 1
|
|
385
|
+
|
|
386
|
+
# Discard comments and extraneous whitespace
|
|
387
|
+
if re_split_comments is not None:
|
|
388
|
+
record_text = re_split_comments.split(line, 1)[0].strip()
|
|
389
|
+
else:
|
|
390
|
+
record_text = line.split('#', 1)[0].strip()
|
|
391
|
+
|
|
392
|
+
if not record_text:
|
|
393
|
+
continue
|
|
394
|
+
|
|
395
|
+
if re_split_fields is not None:
|
|
396
|
+
fields = re_split_fields.split(record_text)
|
|
397
|
+
else:
|
|
398
|
+
fields = record_text.split()
|
|
399
|
+
|
|
400
|
+
# Check that the input size hasn't change (blank lines excluded)
|
|
401
|
+
if not ncols_input:
|
|
402
|
+
ncols_input = len(fields)
|
|
403
|
+
elif len(fields) != ncols_input:
|
|
404
|
+
raise ValueError('expected {:d} columns at line {:d}, but found {:d}'.format(ncols_input, linenumber, len(fields)))
|
|
405
|
+
|
|
406
|
+
# If this is the first time through the loop, allocate temporary storage
|
|
407
|
+
if not ncols_store:
|
|
408
|
+
ncols_store = len(usecols)
|
|
409
|
+
databuffer = np.empty((chunksize, ncols_store), dtype)
|
|
410
|
+
dataset = group.create_dataset(
|
|
411
|
+
dsname, shape=(0, ncols_store), maxshape=(None, ncols_store), chunks=(chunksize, ncols_store), dtype=dtype
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
if usecols:
|
|
415
|
+
for ifield, iifield in enumerate(usecols):
|
|
416
|
+
databuffer[irow, ifield] = dtype(fields[iifield])
|
|
417
|
+
else:
|
|
418
|
+
for ifield, field in enumerate(fields):
|
|
419
|
+
databuffer[irow, ifield] = dtype(field)
|
|
420
|
+
|
|
421
|
+
nrows += 1
|
|
422
|
+
irow += 1
|
|
423
|
+
|
|
424
|
+
# Flush to HDF5 if necessary
|
|
425
|
+
if irow == chunksize:
|
|
426
|
+
westpa.rc.pstatus('\r Read {:d} rows'.format(nrows), end='')
|
|
427
|
+
westpa.rc.pflush()
|
|
428
|
+
dataset.resize((nrows, ncols_store))
|
|
429
|
+
dataset[-irow:] = databuffer
|
|
430
|
+
irow = 0
|
|
431
|
+
|
|
432
|
+
# Flush last bit
|
|
433
|
+
if irow > 0:
|
|
434
|
+
dataset.resize((nrows, ncols_store))
|
|
435
|
+
dataset[-irow:] = databuffer[:irow]
|
|
436
|
+
westpa.rc.pstatus('\r Read {:d} rows'.format(nrows))
|
|
437
|
+
westpa.rc.pflush()
|
|
438
|
+
|
|
439
|
+
def npy_to_h5dataset(self, array, group, dsname, usecols=None, chunksize=None):
|
|
440
|
+
'''Store the given array into a newly-created dataset named ``dsname`` in the HDF5 group
|
|
441
|
+
``group``, optionally only storing a subset of columns. Data is written ``chunksize`` rows at a time,
|
|
442
|
+
allowing very large memory-mapped arrays to be copied.'''
|
|
443
|
+
|
|
444
|
+
usecols = usecols or self.ext_input_usecols
|
|
445
|
+
chunksize = chunksize or self.ext_input_chunksize
|
|
446
|
+
|
|
447
|
+
if usecols:
|
|
448
|
+
shape = (len(array),) + array[0][usecols].shape[1:]
|
|
449
|
+
else:
|
|
450
|
+
shape = array.shape
|
|
451
|
+
|
|
452
|
+
if len(shape) == 1:
|
|
453
|
+
shape = shape + (1,)
|
|
454
|
+
maxlen = len(array)
|
|
455
|
+
mw = len(str(maxlen))
|
|
456
|
+
dataset = group.create_dataset(dsname, shape=shape, dtype=array.dtype)
|
|
457
|
+
|
|
458
|
+
if usecols:
|
|
459
|
+
for istart in range(0, maxlen, chunksize):
|
|
460
|
+
iend = min(istart + chunksize, maxlen)
|
|
461
|
+
dataset[istart:iend] = array[istart:iend, usecols]
|
|
462
|
+
westpa.rc.pstatus('\r Read {:{mw}d}/{:>{mw}d} rows'.format(iend, maxlen, mw=mw), end='')
|
|
463
|
+
westpa.rc.pflush()
|
|
464
|
+
else:
|
|
465
|
+
for istart in range(0, maxlen, chunksize):
|
|
466
|
+
dataset[istart:iend] = array[istart:iend]
|
|
467
|
+
westpa.rc.pstatus('\r Read {:{mw}d}/{:>{mw}d} rows'.format(iend, maxlen, mw=mw), end='')
|
|
468
|
+
westpa.rc.pflush()
|
|
469
|
+
westpa.rc.pstatus()
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
class BFDataManager(AnalysisMixin):
|
|
473
|
+
'''A class to manage brute force trajectory data. The primary purpose is to read in and
|
|
474
|
+
manage brute force progress coordinate data for one or more trajectories. The trajectories need not
|
|
475
|
+
be the same length, but they do need to have the same time spacing for progress coordinate values.'''
|
|
476
|
+
|
|
477
|
+
traj_index_dtype = np.dtype([('pcoord_len', np.uint64), ('source_data', h5py.special_dtype(vlen=str))])
|
|
478
|
+
|
|
479
|
+
def __init__(self):
|
|
480
|
+
super().__init__()
|
|
481
|
+
self.bf_h5name = None
|
|
482
|
+
self.bf_h5file = None
|
|
483
|
+
|
|
484
|
+
def add_args(self, parser, upcall=True):
|
|
485
|
+
if upcall:
|
|
486
|
+
try:
|
|
487
|
+
upcall = super().add_args
|
|
488
|
+
except AttributeError:
|
|
489
|
+
pass
|
|
490
|
+
else:
|
|
491
|
+
upcall(parser)
|
|
492
|
+
|
|
493
|
+
group = parser.add_argument_group('brute force input data options')
|
|
494
|
+
group.add_argument(
|
|
495
|
+
'-B',
|
|
496
|
+
'--bfdata',
|
|
497
|
+
'--brute-force-data',
|
|
498
|
+
dest='bf_h5name',
|
|
499
|
+
metavar='BF_H5FILE',
|
|
500
|
+
default='bf_system.h5',
|
|
501
|
+
help='''Brute force data is/will be stored in BF_H5FILE (default: %(default)s).''',
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
def process_args(self, args, upcall=True):
|
|
505
|
+
self.bf_h5name = args.bf_h5name
|
|
506
|
+
westpa.rc.pstatus("Using brute force data from '{}'".format(self.bf_h5name))
|
|
507
|
+
|
|
508
|
+
if upcall:
|
|
509
|
+
try:
|
|
510
|
+
upfunc = super().process_args
|
|
511
|
+
except AttributeError:
|
|
512
|
+
pass
|
|
513
|
+
else:
|
|
514
|
+
upfunc(args)
|
|
515
|
+
|
|
516
|
+
def _get_traj_group_name(self, traj_id):
|
|
517
|
+
return 'traj_{:09d}'.format(traj_id)
|
|
518
|
+
|
|
519
|
+
def update_traj_index(self, traj_id, pcoord_len, source_data):
|
|
520
|
+
self.bf_h5file['traj_index'][traj_id] = (pcoord_len, source_data)
|
|
521
|
+
|
|
522
|
+
def get_traj_group(self, traj_id):
|
|
523
|
+
return self.bf_h5file[self._get_traj_group_name(traj_id)]
|
|
524
|
+
|
|
525
|
+
def create_traj_group(self):
|
|
526
|
+
new_traj_id = self.get_n_trajs()
|
|
527
|
+
group = self.bf_h5file.create_group(self._get_traj_group_name(new_traj_id))
|
|
528
|
+
self.bf_h5file['traj_index'].resize((new_traj_id + 1,))
|
|
529
|
+
return (new_traj_id, group)
|
|
530
|
+
|
|
531
|
+
def get_n_trajs(self):
|
|
532
|
+
return self.bf_h5file['traj_index'].shape[0]
|
|
533
|
+
|
|
534
|
+
def get_traj_len(self, traj_id):
|
|
535
|
+
return self.bf_h5file['traj_index'][traj_id]['pcoord_len']
|
|
536
|
+
|
|
537
|
+
def get_max_traj_len(self):
|
|
538
|
+
return self.bf_h5file['traj_index']['pcoord_len'].max()
|
|
539
|
+
|
|
540
|
+
def get_pcoord_array(self, traj_id):
|
|
541
|
+
return self.get_traj_group(traj_id)['pcoord'][...]
|
|
542
|
+
|
|
543
|
+
def get_pcoord_dataset(self, traj_id):
|
|
544
|
+
return self.get_traj_group(traj_id)['pcoord']
|
|
545
|
+
|
|
546
|
+
def require_bf_h5file(self):
|
|
547
|
+
if self.bf_h5file is None:
|
|
548
|
+
assert self.bf_h5name
|
|
549
|
+
self.bf_h5file = h5py.File(self.bf_h5name)
|
|
550
|
+
try:
|
|
551
|
+
self.bf_h5file['traj_index']
|
|
552
|
+
except KeyError:
|
|
553
|
+
# A new file; create the trajectory index
|
|
554
|
+
self.bf_h5file.create_dataset('traj_index', shape=(0,), maxshape=(None,), dtype=self.traj_index_dtype)
|
|
555
|
+
return self.bf_h5file
|
|
556
|
+
|
|
557
|
+
def close_bf_h5file(self):
|
|
558
|
+
if self.bf_h5file is not None:
|
|
559
|
+
self.bf_h5file.close()
|
|
560
|
+
self.bf_h5file = None
|