westpa 2022.12__cp313-cp313-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of westpa might be problematic. Click here for more details.

Files changed (149) hide show
  1. westpa/__init__.py +14 -0
  2. westpa/_version.py +21 -0
  3. westpa/analysis/__init__.py +5 -0
  4. westpa/analysis/core.py +746 -0
  5. westpa/analysis/statistics.py +27 -0
  6. westpa/analysis/trajectories.py +360 -0
  7. westpa/cli/__init__.py +0 -0
  8. westpa/cli/core/__init__.py +0 -0
  9. westpa/cli/core/w_fork.py +152 -0
  10. westpa/cli/core/w_init.py +230 -0
  11. westpa/cli/core/w_run.py +77 -0
  12. westpa/cli/core/w_states.py +212 -0
  13. westpa/cli/core/w_succ.py +99 -0
  14. westpa/cli/core/w_truncate.py +68 -0
  15. westpa/cli/tools/__init__.py +0 -0
  16. westpa/cli/tools/ploterr.py +506 -0
  17. westpa/cli/tools/plothist.py +706 -0
  18. westpa/cli/tools/w_assign.py +596 -0
  19. westpa/cli/tools/w_bins.py +166 -0
  20. westpa/cli/tools/w_crawl.py +119 -0
  21. westpa/cli/tools/w_direct.py +547 -0
  22. westpa/cli/tools/w_dumpsegs.py +94 -0
  23. westpa/cli/tools/w_eddist.py +506 -0
  24. westpa/cli/tools/w_fluxanl.py +376 -0
  25. westpa/cli/tools/w_ipa.py +833 -0
  26. westpa/cli/tools/w_kinavg.py +127 -0
  27. westpa/cli/tools/w_kinetics.py +96 -0
  28. westpa/cli/tools/w_multi_west.py +414 -0
  29. westpa/cli/tools/w_ntop.py +213 -0
  30. westpa/cli/tools/w_pdist.py +515 -0
  31. westpa/cli/tools/w_postanalysis_matrix.py +82 -0
  32. westpa/cli/tools/w_postanalysis_reweight.py +53 -0
  33. westpa/cli/tools/w_red.py +491 -0
  34. westpa/cli/tools/w_reweight.py +780 -0
  35. westpa/cli/tools/w_select.py +226 -0
  36. westpa/cli/tools/w_stateprobs.py +111 -0
  37. westpa/cli/tools/w_trace.py +599 -0
  38. westpa/core/__init__.py +0 -0
  39. westpa/core/_rc.py +673 -0
  40. westpa/core/binning/__init__.py +55 -0
  41. westpa/core/binning/_assign.cpython-313-darwin.so +0 -0
  42. westpa/core/binning/assign.py +455 -0
  43. westpa/core/binning/binless.py +96 -0
  44. westpa/core/binning/binless_driver.py +54 -0
  45. westpa/core/binning/binless_manager.py +190 -0
  46. westpa/core/binning/bins.py +47 -0
  47. westpa/core/binning/mab.py +506 -0
  48. westpa/core/binning/mab_driver.py +54 -0
  49. westpa/core/binning/mab_manager.py +198 -0
  50. westpa/core/data_manager.py +1694 -0
  51. westpa/core/extloader.py +74 -0
  52. westpa/core/h5io.py +995 -0
  53. westpa/core/kinetics/__init__.py +24 -0
  54. westpa/core/kinetics/_kinetics.cpython-313-darwin.so +0 -0
  55. westpa/core/kinetics/events.py +147 -0
  56. westpa/core/kinetics/matrates.py +156 -0
  57. westpa/core/kinetics/rate_averaging.py +266 -0
  58. westpa/core/progress.py +218 -0
  59. westpa/core/propagators/__init__.py +54 -0
  60. westpa/core/propagators/executable.py +719 -0
  61. westpa/core/reweight/__init__.py +14 -0
  62. westpa/core/reweight/_reweight.cpython-313-darwin.so +0 -0
  63. westpa/core/reweight/matrix.py +126 -0
  64. westpa/core/segment.py +119 -0
  65. westpa/core/sim_manager.py +835 -0
  66. westpa/core/states.py +359 -0
  67. westpa/core/systems.py +93 -0
  68. westpa/core/textio.py +74 -0
  69. westpa/core/trajectory.py +330 -0
  70. westpa/core/we_driver.py +910 -0
  71. westpa/core/wm_ops.py +43 -0
  72. westpa/core/yamlcfg.py +391 -0
  73. westpa/fasthist/__init__.py +34 -0
  74. westpa/fasthist/_fasthist.cpython-313-darwin.so +0 -0
  75. westpa/mclib/__init__.py +271 -0
  76. westpa/mclib/__main__.py +28 -0
  77. westpa/mclib/_mclib.cpython-313-darwin.so +0 -0
  78. westpa/oldtools/__init__.py +4 -0
  79. westpa/oldtools/aframe/__init__.py +35 -0
  80. westpa/oldtools/aframe/atool.py +75 -0
  81. westpa/oldtools/aframe/base_mixin.py +26 -0
  82. westpa/oldtools/aframe/binning.py +178 -0
  83. westpa/oldtools/aframe/data_reader.py +560 -0
  84. westpa/oldtools/aframe/iter_range.py +200 -0
  85. westpa/oldtools/aframe/kinetics.py +117 -0
  86. westpa/oldtools/aframe/mcbs.py +153 -0
  87. westpa/oldtools/aframe/output.py +39 -0
  88. westpa/oldtools/aframe/plotting.py +90 -0
  89. westpa/oldtools/aframe/trajwalker.py +126 -0
  90. westpa/oldtools/aframe/transitions.py +469 -0
  91. westpa/oldtools/cmds/__init__.py +0 -0
  92. westpa/oldtools/cmds/w_ttimes.py +361 -0
  93. westpa/oldtools/files.py +34 -0
  94. westpa/oldtools/miscfn.py +23 -0
  95. westpa/oldtools/stats/__init__.py +4 -0
  96. westpa/oldtools/stats/accumulator.py +35 -0
  97. westpa/oldtools/stats/edfs.py +129 -0
  98. westpa/oldtools/stats/mcbs.py +96 -0
  99. westpa/tools/__init__.py +33 -0
  100. westpa/tools/binning.py +472 -0
  101. westpa/tools/core.py +340 -0
  102. westpa/tools/data_reader.py +159 -0
  103. westpa/tools/dtypes.py +31 -0
  104. westpa/tools/iter_range.py +198 -0
  105. westpa/tools/kinetics_tool.py +340 -0
  106. westpa/tools/plot.py +283 -0
  107. westpa/tools/progress.py +17 -0
  108. westpa/tools/selected_segs.py +154 -0
  109. westpa/tools/wipi.py +751 -0
  110. westpa/trajtree/__init__.py +4 -0
  111. westpa/trajtree/_trajtree.cpython-313-darwin.so +0 -0
  112. westpa/trajtree/trajtree.py +117 -0
  113. westpa/westext/__init__.py +0 -0
  114. westpa/westext/adaptvoronoi/__init__.py +3 -0
  115. westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
  116. westpa/westext/hamsm_restarting/__init__.py +3 -0
  117. westpa/westext/hamsm_restarting/example_overrides.py +35 -0
  118. westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
  119. westpa/westext/stringmethod/__init__.py +11 -0
  120. westpa/westext/stringmethod/fourier_fitting.py +69 -0
  121. westpa/westext/stringmethod/string_driver.py +253 -0
  122. westpa/westext/stringmethod/string_method.py +306 -0
  123. westpa/westext/weed/BinCluster.py +180 -0
  124. westpa/westext/weed/ProbAdjustEquil.py +100 -0
  125. westpa/westext/weed/UncertMath.py +247 -0
  126. westpa/westext/weed/__init__.py +10 -0
  127. westpa/westext/weed/weed_driver.py +192 -0
  128. westpa/westext/wess/ProbAdjust.py +101 -0
  129. westpa/westext/wess/__init__.py +6 -0
  130. westpa/westext/wess/wess_driver.py +217 -0
  131. westpa/work_managers/__init__.py +57 -0
  132. westpa/work_managers/core.py +396 -0
  133. westpa/work_managers/environment.py +134 -0
  134. westpa/work_managers/mpi.py +318 -0
  135. westpa/work_managers/processes.py +187 -0
  136. westpa/work_managers/serial.py +28 -0
  137. westpa/work_managers/threads.py +79 -0
  138. westpa/work_managers/zeromq/__init__.py +20 -0
  139. westpa/work_managers/zeromq/core.py +641 -0
  140. westpa/work_managers/zeromq/node.py +131 -0
  141. westpa/work_managers/zeromq/work_manager.py +526 -0
  142. westpa/work_managers/zeromq/worker.py +320 -0
  143. westpa-2022.12.dist-info/AUTHORS +22 -0
  144. westpa-2022.12.dist-info/LICENSE +21 -0
  145. westpa-2022.12.dist-info/METADATA +193 -0
  146. westpa-2022.12.dist-info/RECORD +149 -0
  147. westpa-2022.12.dist-info/WHEEL +6 -0
  148. westpa-2022.12.dist-info/entry_points.txt +29 -0
  149. westpa-2022.12.dist-info/top_level.txt +1 -0
@@ -0,0 +1,560 @@
1
+ import logging
2
+
3
+ import h5py
4
+ import numpy as np
5
+
6
+ import westpa
7
+ from westpa.core.segment import Segment
8
+ from westpa.oldtools.aframe import AnalysisMixin
9
+ from westpa.oldtools.miscfn import parse_int_list
10
+
11
+ log = logging.getLogger(__name__)
12
+
13
+
14
+ class WESTDataReaderMixin(AnalysisMixin):
15
+ '''A mixin for analysis requiring access to the HDF5 files generated during a WEST run.'''
16
+
17
+ def __init__(self):
18
+ super().__init__()
19
+
20
+ self.data_manager = None
21
+ self.west_h5name = None
22
+
23
+ # Whether pcoord caching is active
24
+ self.__cache_pcoords = False
25
+
26
+ # Cached items
27
+ self.__c_summary = None
28
+ self.__c_iter_groups = dict()
29
+ self.__c_seg_id_ranges = dict()
30
+ self.__c_seg_indices = dict()
31
+ self.__c_wtg_parent_arrays = dict()
32
+ self.__c_parent_arrays = dict()
33
+ self.__c_pcoord_arrays = dict()
34
+ self.__c_pcoord_datasets = dict()
35
+
36
+ def add_args(self, parser, upcall=True):
37
+ if upcall:
38
+ try:
39
+ upcall = super().add_args
40
+ except AttributeError:
41
+ pass
42
+ else:
43
+ upcall(parser)
44
+
45
+ group = parser.add_argument_group('WEST input data options')
46
+ group.add_argument(
47
+ '-W',
48
+ '--west-data',
49
+ dest='west_h5name',
50
+ metavar='WEST_H5FILE',
51
+ help='''Take WEST data from WEST_H5FILE (default: read from the HDF5 file specified in west.cfg).''',
52
+ )
53
+
54
+ def process_args(self, args, upcall=True):
55
+ if args.west_h5name:
56
+ self.west_h5name = args.west_h5name
57
+ else:
58
+ westpa.rc.config.require(['west', 'data', 'west_data_file'])
59
+ self.west_h5name = westpa.rc.config.get_path(['west', 'data', 'west_data_file'])
60
+
61
+ westpa.rc.pstatus("Using WEST data from '{}'".format(self.west_h5name))
62
+
63
+ self.data_manager = westpa.rc.get_data_manager()
64
+ self.data_manager.we_h5filename = self.west_h5name
65
+ self.data_manager.open_backing(mode='r')
66
+
67
+ if upcall:
68
+ try:
69
+ upfunc = super().process_args
70
+ except AttributeError:
71
+ pass
72
+ else:
73
+ upfunc(args)
74
+
75
+ def clear_run_cache(self):
76
+ del self.__c_summary
77
+ del self.__c_iter_groups, self.__c_seg_id_ranges, self.__c_seg_indices, self.__c_parent_arrays, self.__c_parent_arrays
78
+ del self.__c_pcoord_arrays, self.__c_pcoord_datasets
79
+
80
+ self.__c_summary = None
81
+ self.__c_iter_groups = dict()
82
+ self.__c_seg_id_ranges = dict()
83
+ self.__c_seg_indices = dict()
84
+ self.__c_parent_arrays = dict()
85
+ self.__c_wtg_parent_arrays = dict()
86
+ self.__c_pcoord_arrays = dict()
87
+ self.__c_pcoord_datasets = dict()
88
+
89
+ @property
90
+ def cache_pcoords(self):
91
+ '''Whether or not to cache progress coordinate data. While caching this data
92
+ can significantly speed up some analysis operations, this requires
93
+ copious RAM.
94
+
95
+ Setting this to False when it was formerly True will release any cached data.
96
+ '''
97
+ return self.__cache_pcoords
98
+
99
+ @cache_pcoords.setter
100
+ def cache_pcoords(self, cache):
101
+ self.__cache_pcoords = cache
102
+
103
+ if not cache:
104
+ del self.__c_pcoord_arrays
105
+ self.__c_pcoord_arrays = dict()
106
+
107
+ def get_summary_table(self):
108
+ if self.__c_summary is None:
109
+ self.__c_summary = self.data_manager.we_h5file['/summary'][...]
110
+ return self.__c_summary
111
+
112
+ def get_iter_group(self, n_iter):
113
+ '''Return the HDF5 group corresponding to ``n_iter``'''
114
+ try:
115
+ return self.__c_iter_groups[n_iter]
116
+ except KeyError:
117
+ iter_group = self.data_manager.get_iter_group(n_iter)
118
+ return iter_group
119
+
120
+ def get_segments(self, n_iter, include_pcoords=True):
121
+ '''Return all segments present in iteration n_iter'''
122
+ return self.get_segments_by_id(n_iter, self.get_seg_ids(n_iter, None), include_pcoords)
123
+
124
+ def get_segments_by_id(self, n_iter, seg_ids, include_pcoords=True):
125
+ '''Get segments from the data manager, employing caching where possible'''
126
+
127
+ if len(seg_ids) == 0:
128
+ return []
129
+
130
+ seg_index = self.get_seg_index(n_iter)
131
+ all_wtg_parent_ids = self.get_wtg_parent_array(n_iter)
132
+
133
+ segments = []
134
+
135
+ if include_pcoords:
136
+ pcoords = self.get_pcoords(n_iter, seg_ids)
137
+
138
+ for isegid, seg_id in enumerate(seg_ids):
139
+ row = seg_index[seg_id]
140
+ parents_offset = row['wtg_offset']
141
+ n_parents = row['wtg_n_parents']
142
+ segment = Segment(
143
+ seg_id=seg_id,
144
+ n_iter=n_iter,
145
+ status=row['status'],
146
+ endpoint_type=row['endpoint_type'],
147
+ walltime=row['walltime'],
148
+ cputime=row['cputime'],
149
+ weight=row['weight'],
150
+ )
151
+ if include_pcoords:
152
+ segment.pcoord = pcoords[isegid]
153
+
154
+ parent_ids = all_wtg_parent_ids[parents_offset : parents_offset + n_parents]
155
+ segment.wtg_parent_ids = {int(parent_id) for parent_id in parent_ids}
156
+ segment.parent_id = int(parent_ids[0])
157
+ segments.append(segment)
158
+
159
+ return segments
160
+
161
+ def get_children(self, segment, include_pcoords=True):
162
+ parents = self.get_parent_array(segment.n_iter + 1)
163
+ seg_ids = self.get_seg_ids(segment.n_iter + 1, parents == segment.seg_id)
164
+ return self.get_segments_by_id(segment.n_iter + 1, seg_ids, include_pcoords)
165
+
166
+ def get_seg_index(self, n_iter):
167
+ try:
168
+ return self.__c_seg_indices[n_iter]
169
+ except KeyError:
170
+ seg_index = self.__c_seg_indices[n_iter] = self.get_iter_group(n_iter)['seg_index'][...]
171
+ return seg_index
172
+
173
+ def get_wtg_parent_array(self, n_iter):
174
+ try:
175
+ return self.__c_wtg_parent_arrays[n_iter]
176
+ except KeyError:
177
+ parent_array = self.__c_wtg_parent_arrays[n_iter] = self.get_iter_group(n_iter)['wtgraph'][...]
178
+ return parent_array
179
+
180
+ def get_parent_array(self, n_iter):
181
+ try:
182
+ return self.__c_parent_arrays[n_iter]
183
+ except KeyError:
184
+ parent_array = self.get_seg_index(n_iter)['parent_id']
185
+ self.__c_parent_arrays[n_iter] = parent_array
186
+ return parent_array
187
+
188
+ def get_pcoord_array(self, n_iter):
189
+ try:
190
+ return self.__c_pcoord_arrays[n_iter]
191
+ except KeyError:
192
+ pcoords = self.__c_pcoord_arrays[n_iter] = self.get_iter_group(n_iter)['pcoord'][...]
193
+ return pcoords
194
+
195
+ def get_pcoord_dataset(self, n_iter):
196
+ try:
197
+ return self.__c_pcoord_datasets[n_iter]
198
+ except KeyError:
199
+ pcoord_ds = self.__c_pcoord_datasets[n_iter] = self.get_iter_group(n_iter)['pcoord']
200
+ return pcoord_ds
201
+
202
+ def get_pcoords(self, n_iter, seg_ids):
203
+ if self.__cache_pcoords:
204
+ pcarray = self.get_pcoord_array(n_iter)
205
+ return [pcarray[seg_id, ...] for seg_id in seg_ids]
206
+ else:
207
+ return self.get_pcoord_dataset(n_iter)[list(seg_ids), ...]
208
+
209
+ def get_seg_ids(self, n_iter, bool_array=None):
210
+ try:
211
+ all_ids = self.__c_seg_id_ranges[n_iter]
212
+ except KeyError:
213
+ all_ids = self.__c_seg_id_ranges[n_iter] = np.arange(0, len(self.get_seg_index(n_iter)), dtype=np.uint32)
214
+
215
+ if bool_array is None:
216
+ return all_ids
217
+ else:
218
+ seg_ids = all_ids[bool_array]
219
+ try:
220
+ if len(seg_ids) == 0:
221
+ return []
222
+ except TypeError:
223
+ # Not iterable, for some bizarre reason
224
+ return [seg_ids]
225
+ else:
226
+ return seg_ids
227
+
228
+ def get_created_seg_ids(self, n_iter):
229
+ '''Return a list of seg_ids corresponding to segments which were created for the given iteration (are not
230
+ continuations).'''
231
+
232
+ # Created segments have parent_id < 0
233
+ parent_ids = self.get_parent_array(n_iter)
234
+ return self.get_seg_ids(n_iter, parent_ids < 0)
235
+
236
+ def max_iter_segs_in_range(self, first_iter, last_iter):
237
+ '''Return the maximum number of segments present in any iteration in the range selected'''
238
+ n_particles = self.get_summary_table()['n_particles']
239
+ return n_particles[first_iter - 1 : last_iter].max()
240
+
241
+ def total_segs_in_range(self, first_iter, last_iter):
242
+ '''Return the total number of segments present in all iterations in the range selected'''
243
+ n_particles = self.get_summary_table()['n_particles']
244
+ return n_particles[first_iter - 1 : last_iter].sum()
245
+
246
+ def get_pcoord_len(self, n_iter):
247
+ '''Get the length of the progress coordinate array for the given iteration.'''
248
+ pcoord_ds = self.get_pcoord_dataset(n_iter)
249
+ return pcoord_ds.shape[1]
250
+
251
+ def get_total_time(self, first_iter=None, last_iter=None, dt=None):
252
+ '''Return the total amount of simulation time spanned between first_iter and last_iter (inclusive).'''
253
+ first_iter = first_iter or self.first_iter
254
+ last_iter = last_iter or self.last_iter
255
+ dt = dt or getattr(self, 'dt', 1.0)
256
+
257
+ total_len = 0
258
+ for n_iter in range(first_iter, last_iter + 1):
259
+ total_len += self.get_pcoord_len(n_iter) - 1
260
+ return total_len * dt
261
+
262
+
263
+ class ExtDataReaderMixin(AnalysisMixin):
264
+ '''An external data reader, primarily designed for reading brute force data, but also suitable
265
+ for any auxiliary datasets required for analysis.
266
+ '''
267
+
268
+ default_chunksize = 8192
269
+
270
+ def __init__(self):
271
+ super().__init__()
272
+
273
+ self.ext_input_nargs = '+'
274
+ self.ext_input_filenames = []
275
+ self.ext_input_chunksize = self.default_chunksize
276
+ self.ext_input_usecols = None
277
+ self.ext_input_comment_regexp = None
278
+ self.ext_input_sep_regexp = None
279
+
280
+ def add_args(self, parser, upcall=True):
281
+ if upcall:
282
+ try:
283
+ upcall = super().add_args
284
+ except AttributeError:
285
+ pass
286
+ else:
287
+ upcall(parser)
288
+
289
+ input_options = parser.add_argument_group('external data input options')
290
+ input_options.add_argument(
291
+ 'datafiles',
292
+ nargs=self.ext_input_nargs,
293
+ metavar='DATAFILE',
294
+ help='''Data file(s) to analyze, either text or Numpy (.npy or .npz) format.
295
+ Uncompressed numpy files will be memory-mapped, allowing analysis of data larger than
296
+ available RAM (though not larger than the available address space).''',
297
+ )
298
+ input_options.add_argument(
299
+ '--usecols',
300
+ dest='usecols',
301
+ metavar='COLUMNS',
302
+ type=parse_int_list,
303
+ help='''Use only the given COLUMNS from the input file(s), e.g. "0", "0,1",
304
+ "0:5,7,9:10".''',
305
+ )
306
+ input_options.add_argument(
307
+ '--chunksize',
308
+ dest='chunksize',
309
+ type=int,
310
+ default=self.default_chunksize,
311
+ help='''Process input data in blocks of size CHUNKSIZE. This will only reduce memory
312
+ requirements when using uncompressed Numpy (.npy) format input. (Default: %(default)d.)''',
313
+ )
314
+
315
+ def process_args(self, args, upcall=True):
316
+ if args.usecols:
317
+ westpa.rc.pstatus('Using only the following columns from external input: {!s}'.format(args.usecols))
318
+ self.ext_input_usecols = args.usecols
319
+ else:
320
+ self.ext_input_usecols = None
321
+
322
+ self.ext_input_filenames = args.datafiles
323
+ self.ext_input_chunksize = args.chunksize or self.default_chunksize
324
+
325
+ if upcall:
326
+ try:
327
+ upfunc = super().process_args
328
+ except AttributeError:
329
+ pass
330
+ else:
331
+ upfunc(args)
332
+
333
+ def is_npy(self, filename):
334
+ with open(filename, 'rb') as fileobj:
335
+ first_bytes = fileobj.read(len(np.lib.format.MAGIC_PREFIX))
336
+
337
+ if first_bytes == np.lib.format.MAGIC_PREFIX:
338
+ return True
339
+ else:
340
+ return False
341
+
342
+ def load_npy_or_text(self, filename):
343
+ '''Load an array from an existing .npy file, or read a text file and
344
+ convert to a NumPy array. In either case, return a NumPy array. If a
345
+ pickled NumPy dataset is found, memory-map it read-only. If the specified
346
+ file does not contain a pickled NumPy array, attempt to read the file using
347
+ numpy.loadtxt(filename).'''
348
+
349
+ if self.is_npy(filename):
350
+ return np.load(filename, 'r')
351
+ else:
352
+ return np.loadtxt(filename)
353
+
354
+ def text_to_h5dataset(self, fileobj, group, dsname, dtype=np.float64, skiprows=0, usecols=None, chunksize=None):
355
+ '''Read text-format data from the given filename or file-like object ``fileobj`` and write to a newly-created dataset
356
+ called ``dsname`` in the HDF5 group ``group``. The data is stored as type ``dtype``. By default, the shape is
357
+ taken as (number of lines, number of columns); columns can be omitted by specifying a list for ``usecols``,
358
+ and lines can be skipped by using ``skiprows``. Data is read in chunks of ``chunksize`` rows.'''
359
+
360
+ try:
361
+ fileobj.readline
362
+ except AttributeError:
363
+ fileobj = open(fileobj, 'rt')
364
+
365
+ usecols = usecols or self.usecols
366
+ chunksize = chunksize or self.ext_input_chunksize
367
+
368
+ linenumber = 0
369
+ for iskip in range(skiprows or 0):
370
+ fileobj.readline()
371
+ linenumber += 1
372
+
373
+ nrows = 0
374
+ irow = 0
375
+ ncols_input = None # number of columns in input
376
+ ncols_store = None # number of columns to store
377
+ databuffer = None
378
+ dataset = None
379
+
380
+ re_split_comments = self.ext_input_comment_regexp
381
+ re_split_fields = self.ext_input_sep_regexp
382
+
383
+ for line in fileobj:
384
+ linenumber += 1
385
+
386
+ # Discard comments and extraneous whitespace
387
+ if re_split_comments is not None:
388
+ record_text = re_split_comments.split(line, 1)[0].strip()
389
+ else:
390
+ record_text = line.split('#', 1)[0].strip()
391
+
392
+ if not record_text:
393
+ continue
394
+
395
+ if re_split_fields is not None:
396
+ fields = re_split_fields.split(record_text)
397
+ else:
398
+ fields = record_text.split()
399
+
400
+ # Check that the input size hasn't change (blank lines excluded)
401
+ if not ncols_input:
402
+ ncols_input = len(fields)
403
+ elif len(fields) != ncols_input:
404
+ raise ValueError('expected {:d} columns at line {:d}, but found {:d}'.format(ncols_input, linenumber, len(fields)))
405
+
406
+ # If this is the first time through the loop, allocate temporary storage
407
+ if not ncols_store:
408
+ ncols_store = len(usecols)
409
+ databuffer = np.empty((chunksize, ncols_store), dtype)
410
+ dataset = group.create_dataset(
411
+ dsname, shape=(0, ncols_store), maxshape=(None, ncols_store), chunks=(chunksize, ncols_store), dtype=dtype
412
+ )
413
+
414
+ if usecols:
415
+ for ifield, iifield in enumerate(usecols):
416
+ databuffer[irow, ifield] = dtype(fields[iifield])
417
+ else:
418
+ for ifield, field in enumerate(fields):
419
+ databuffer[irow, ifield] = dtype(field)
420
+
421
+ nrows += 1
422
+ irow += 1
423
+
424
+ # Flush to HDF5 if necessary
425
+ if irow == chunksize:
426
+ westpa.rc.pstatus('\r Read {:d} rows'.format(nrows), end='')
427
+ westpa.rc.pflush()
428
+ dataset.resize((nrows, ncols_store))
429
+ dataset[-irow:] = databuffer
430
+ irow = 0
431
+
432
+ # Flush last bit
433
+ if irow > 0:
434
+ dataset.resize((nrows, ncols_store))
435
+ dataset[-irow:] = databuffer[:irow]
436
+ westpa.rc.pstatus('\r Read {:d} rows'.format(nrows))
437
+ westpa.rc.pflush()
438
+
439
+ def npy_to_h5dataset(self, array, group, dsname, usecols=None, chunksize=None):
440
+ '''Store the given array into a newly-created dataset named ``dsname`` in the HDF5 group
441
+ ``group``, optionally only storing a subset of columns. Data is written ``chunksize`` rows at a time,
442
+ allowing very large memory-mapped arrays to be copied.'''
443
+
444
+ usecols = usecols or self.ext_input_usecols
445
+ chunksize = chunksize or self.ext_input_chunksize
446
+
447
+ if usecols:
448
+ shape = (len(array),) + array[0][usecols].shape[1:]
449
+ else:
450
+ shape = array.shape
451
+
452
+ if len(shape) == 1:
453
+ shape = shape + (1,)
454
+ maxlen = len(array)
455
+ mw = len(str(maxlen))
456
+ dataset = group.create_dataset(dsname, shape=shape, dtype=array.dtype)
457
+
458
+ if usecols:
459
+ for istart in range(0, maxlen, chunksize):
460
+ iend = min(istart + chunksize, maxlen)
461
+ dataset[istart:iend] = array[istart:iend, usecols]
462
+ westpa.rc.pstatus('\r Read {:{mw}d}/{:>{mw}d} rows'.format(iend, maxlen, mw=mw), end='')
463
+ westpa.rc.pflush()
464
+ else:
465
+ for istart in range(0, maxlen, chunksize):
466
+ dataset[istart:iend] = array[istart:iend]
467
+ westpa.rc.pstatus('\r Read {:{mw}d}/{:>{mw}d} rows'.format(iend, maxlen, mw=mw), end='')
468
+ westpa.rc.pflush()
469
+ westpa.rc.pstatus()
470
+
471
+
472
+ class BFDataManager(AnalysisMixin):
473
+ '''A class to manage brute force trajectory data. The primary purpose is to read in and
474
+ manage brute force progress coordinate data for one or more trajectories. The trajectories need not
475
+ be the same length, but they do need to have the same time spacing for progress coordinate values.'''
476
+
477
+ traj_index_dtype = np.dtype([('pcoord_len', np.uint64), ('source_data', h5py.special_dtype(vlen=str))])
478
+
479
+ def __init__(self):
480
+ super().__init__()
481
+ self.bf_h5name = None
482
+ self.bf_h5file = None
483
+
484
+ def add_args(self, parser, upcall=True):
485
+ if upcall:
486
+ try:
487
+ upcall = super().add_args
488
+ except AttributeError:
489
+ pass
490
+ else:
491
+ upcall(parser)
492
+
493
+ group = parser.add_argument_group('brute force input data options')
494
+ group.add_argument(
495
+ '-B',
496
+ '--bfdata',
497
+ '--brute-force-data',
498
+ dest='bf_h5name',
499
+ metavar='BF_H5FILE',
500
+ default='bf_system.h5',
501
+ help='''Brute force data is/will be stored in BF_H5FILE (default: %(default)s).''',
502
+ )
503
+
504
+ def process_args(self, args, upcall=True):
505
+ self.bf_h5name = args.bf_h5name
506
+ westpa.rc.pstatus("Using brute force data from '{}'".format(self.bf_h5name))
507
+
508
+ if upcall:
509
+ try:
510
+ upfunc = super().process_args
511
+ except AttributeError:
512
+ pass
513
+ else:
514
+ upfunc(args)
515
+
516
+ def _get_traj_group_name(self, traj_id):
517
+ return 'traj_{:09d}'.format(traj_id)
518
+
519
+ def update_traj_index(self, traj_id, pcoord_len, source_data):
520
+ self.bf_h5file['traj_index'][traj_id] = (pcoord_len, source_data)
521
+
522
+ def get_traj_group(self, traj_id):
523
+ return self.bf_h5file[self._get_traj_group_name(traj_id)]
524
+
525
+ def create_traj_group(self):
526
+ new_traj_id = self.get_n_trajs()
527
+ group = self.bf_h5file.create_group(self._get_traj_group_name(new_traj_id))
528
+ self.bf_h5file['traj_index'].resize((new_traj_id + 1,))
529
+ return (new_traj_id, group)
530
+
531
+ def get_n_trajs(self):
532
+ return self.bf_h5file['traj_index'].shape[0]
533
+
534
+ def get_traj_len(self, traj_id):
535
+ return self.bf_h5file['traj_index'][traj_id]['pcoord_len']
536
+
537
+ def get_max_traj_len(self):
538
+ return self.bf_h5file['traj_index']['pcoord_len'].max()
539
+
540
+ def get_pcoord_array(self, traj_id):
541
+ return self.get_traj_group(traj_id)['pcoord'][...]
542
+
543
+ def get_pcoord_dataset(self, traj_id):
544
+ return self.get_traj_group(traj_id)['pcoord']
545
+
546
+ def require_bf_h5file(self):
547
+ if self.bf_h5file is None:
548
+ assert self.bf_h5name
549
+ self.bf_h5file = h5py.File(self.bf_h5name)
550
+ try:
551
+ self.bf_h5file['traj_index']
552
+ except KeyError:
553
+ # A new file; create the trajectory index
554
+ self.bf_h5file.create_dataset('traj_index', shape=(0,), maxshape=(None,), dtype=self.traj_index_dtype)
555
+ return self.bf_h5file
556
+
557
+ def close_bf_h5file(self):
558
+ if self.bf_h5file is not None:
559
+ self.bf_h5file.close()
560
+ self.bf_h5file = None