westpa 2022.12__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of westpa might be problematic. Click here for more details.

Files changed (149) hide show
  1. westpa/__init__.py +14 -0
  2. westpa/_version.py +21 -0
  3. westpa/analysis/__init__.py +5 -0
  4. westpa/analysis/core.py +746 -0
  5. westpa/analysis/statistics.py +27 -0
  6. westpa/analysis/trajectories.py +360 -0
  7. westpa/cli/__init__.py +0 -0
  8. westpa/cli/core/__init__.py +0 -0
  9. westpa/cli/core/w_fork.py +152 -0
  10. westpa/cli/core/w_init.py +230 -0
  11. westpa/cli/core/w_run.py +77 -0
  12. westpa/cli/core/w_states.py +212 -0
  13. westpa/cli/core/w_succ.py +99 -0
  14. westpa/cli/core/w_truncate.py +68 -0
  15. westpa/cli/tools/__init__.py +0 -0
  16. westpa/cli/tools/ploterr.py +506 -0
  17. westpa/cli/tools/plothist.py +706 -0
  18. westpa/cli/tools/w_assign.py +596 -0
  19. westpa/cli/tools/w_bins.py +166 -0
  20. westpa/cli/tools/w_crawl.py +119 -0
  21. westpa/cli/tools/w_direct.py +547 -0
  22. westpa/cli/tools/w_dumpsegs.py +94 -0
  23. westpa/cli/tools/w_eddist.py +506 -0
  24. westpa/cli/tools/w_fluxanl.py +376 -0
  25. westpa/cli/tools/w_ipa.py +833 -0
  26. westpa/cli/tools/w_kinavg.py +127 -0
  27. westpa/cli/tools/w_kinetics.py +96 -0
  28. westpa/cli/tools/w_multi_west.py +414 -0
  29. westpa/cli/tools/w_ntop.py +213 -0
  30. westpa/cli/tools/w_pdist.py +515 -0
  31. westpa/cli/tools/w_postanalysis_matrix.py +82 -0
  32. westpa/cli/tools/w_postanalysis_reweight.py +53 -0
  33. westpa/cli/tools/w_red.py +491 -0
  34. westpa/cli/tools/w_reweight.py +780 -0
  35. westpa/cli/tools/w_select.py +226 -0
  36. westpa/cli/tools/w_stateprobs.py +111 -0
  37. westpa/cli/tools/w_trace.py +599 -0
  38. westpa/core/__init__.py +0 -0
  39. westpa/core/_rc.py +673 -0
  40. westpa/core/binning/__init__.py +55 -0
  41. westpa/core/binning/_assign.cpython-313-darwin.so +0 -0
  42. westpa/core/binning/assign.py +455 -0
  43. westpa/core/binning/binless.py +96 -0
  44. westpa/core/binning/binless_driver.py +54 -0
  45. westpa/core/binning/binless_manager.py +190 -0
  46. westpa/core/binning/bins.py +47 -0
  47. westpa/core/binning/mab.py +506 -0
  48. westpa/core/binning/mab_driver.py +54 -0
  49. westpa/core/binning/mab_manager.py +198 -0
  50. westpa/core/data_manager.py +1694 -0
  51. westpa/core/extloader.py +74 -0
  52. westpa/core/h5io.py +995 -0
  53. westpa/core/kinetics/__init__.py +24 -0
  54. westpa/core/kinetics/_kinetics.cpython-313-darwin.so +0 -0
  55. westpa/core/kinetics/events.py +147 -0
  56. westpa/core/kinetics/matrates.py +156 -0
  57. westpa/core/kinetics/rate_averaging.py +266 -0
  58. westpa/core/progress.py +218 -0
  59. westpa/core/propagators/__init__.py +54 -0
  60. westpa/core/propagators/executable.py +719 -0
  61. westpa/core/reweight/__init__.py +14 -0
  62. westpa/core/reweight/_reweight.cpython-313-darwin.so +0 -0
  63. westpa/core/reweight/matrix.py +126 -0
  64. westpa/core/segment.py +119 -0
  65. westpa/core/sim_manager.py +835 -0
  66. westpa/core/states.py +359 -0
  67. westpa/core/systems.py +93 -0
  68. westpa/core/textio.py +74 -0
  69. westpa/core/trajectory.py +330 -0
  70. westpa/core/we_driver.py +910 -0
  71. westpa/core/wm_ops.py +43 -0
  72. westpa/core/yamlcfg.py +391 -0
  73. westpa/fasthist/__init__.py +34 -0
  74. westpa/fasthist/_fasthist.cpython-313-darwin.so +0 -0
  75. westpa/mclib/__init__.py +271 -0
  76. westpa/mclib/__main__.py +28 -0
  77. westpa/mclib/_mclib.cpython-313-darwin.so +0 -0
  78. westpa/oldtools/__init__.py +4 -0
  79. westpa/oldtools/aframe/__init__.py +35 -0
  80. westpa/oldtools/aframe/atool.py +75 -0
  81. westpa/oldtools/aframe/base_mixin.py +26 -0
  82. westpa/oldtools/aframe/binning.py +178 -0
  83. westpa/oldtools/aframe/data_reader.py +560 -0
  84. westpa/oldtools/aframe/iter_range.py +200 -0
  85. westpa/oldtools/aframe/kinetics.py +117 -0
  86. westpa/oldtools/aframe/mcbs.py +153 -0
  87. westpa/oldtools/aframe/output.py +39 -0
  88. westpa/oldtools/aframe/plotting.py +90 -0
  89. westpa/oldtools/aframe/trajwalker.py +126 -0
  90. westpa/oldtools/aframe/transitions.py +469 -0
  91. westpa/oldtools/cmds/__init__.py +0 -0
  92. westpa/oldtools/cmds/w_ttimes.py +361 -0
  93. westpa/oldtools/files.py +34 -0
  94. westpa/oldtools/miscfn.py +23 -0
  95. westpa/oldtools/stats/__init__.py +4 -0
  96. westpa/oldtools/stats/accumulator.py +35 -0
  97. westpa/oldtools/stats/edfs.py +129 -0
  98. westpa/oldtools/stats/mcbs.py +96 -0
  99. westpa/tools/__init__.py +33 -0
  100. westpa/tools/binning.py +472 -0
  101. westpa/tools/core.py +340 -0
  102. westpa/tools/data_reader.py +159 -0
  103. westpa/tools/dtypes.py +31 -0
  104. westpa/tools/iter_range.py +198 -0
  105. westpa/tools/kinetics_tool.py +340 -0
  106. westpa/tools/plot.py +283 -0
  107. westpa/tools/progress.py +17 -0
  108. westpa/tools/selected_segs.py +154 -0
  109. westpa/tools/wipi.py +751 -0
  110. westpa/trajtree/__init__.py +4 -0
  111. westpa/trajtree/_trajtree.cpython-313-darwin.so +0 -0
  112. westpa/trajtree/trajtree.py +117 -0
  113. westpa/westext/__init__.py +0 -0
  114. westpa/westext/adaptvoronoi/__init__.py +3 -0
  115. westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
  116. westpa/westext/hamsm_restarting/__init__.py +3 -0
  117. westpa/westext/hamsm_restarting/example_overrides.py +35 -0
  118. westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
  119. westpa/westext/stringmethod/__init__.py +11 -0
  120. westpa/westext/stringmethod/fourier_fitting.py +69 -0
  121. westpa/westext/stringmethod/string_driver.py +253 -0
  122. westpa/westext/stringmethod/string_method.py +306 -0
  123. westpa/westext/weed/BinCluster.py +180 -0
  124. westpa/westext/weed/ProbAdjustEquil.py +100 -0
  125. westpa/westext/weed/UncertMath.py +247 -0
  126. westpa/westext/weed/__init__.py +10 -0
  127. westpa/westext/weed/weed_driver.py +192 -0
  128. westpa/westext/wess/ProbAdjust.py +101 -0
  129. westpa/westext/wess/__init__.py +6 -0
  130. westpa/westext/wess/wess_driver.py +217 -0
  131. westpa/work_managers/__init__.py +57 -0
  132. westpa/work_managers/core.py +396 -0
  133. westpa/work_managers/environment.py +134 -0
  134. westpa/work_managers/mpi.py +318 -0
  135. westpa/work_managers/processes.py +187 -0
  136. westpa/work_managers/serial.py +28 -0
  137. westpa/work_managers/threads.py +79 -0
  138. westpa/work_managers/zeromq/__init__.py +20 -0
  139. westpa/work_managers/zeromq/core.py +641 -0
  140. westpa/work_managers/zeromq/node.py +131 -0
  141. westpa/work_managers/zeromq/work_manager.py +526 -0
  142. westpa/work_managers/zeromq/worker.py +320 -0
  143. westpa-2022.12.dist-info/AUTHORS +22 -0
  144. westpa-2022.12.dist-info/LICENSE +21 -0
  145. westpa-2022.12.dist-info/METADATA +193 -0
  146. westpa-2022.12.dist-info/RECORD +149 -0
  147. westpa-2022.12.dist-info/WHEEL +6 -0
  148. westpa-2022.12.dist-info/entry_points.txt +29 -0
  149. westpa-2022.12.dist-info/top_level.txt +1 -0
@@ -0,0 +1,599 @@
1
+ import re
2
+
3
+ import h5py
4
+ import numpy as np
5
+
6
+ from westpa.tools import WESTTool, WESTDataReader
7
+ import westpa
8
+ from westpa.core import h5io
9
+
10
+ from westpa.core.segment import Segment
11
+ from westpa.core.states import InitialState
12
+ from westpa.core.data_manager import weight_dtype, n_iter_dtype, seg_id_dtype, utime_dtype
13
+
14
+
15
+ class Trace:
16
+ '''A class representing a trace of a certain trajectory segment back to its origin.'''
17
+
18
+ def __init__(self, summary, endpoint_type, basis_state, initial_state, data_manager=None):
19
+ self.summary = summary
20
+ self.endpoint_type = endpoint_type
21
+ self.basis_state = basis_state
22
+ self.initial_state = initial_state
23
+ self.data_manager = data_manager or westpa.rc.get_data_manager()
24
+
25
+ # A mapping from aux file names to open h5py.File objects, to minimize time
26
+
27
+ self._auxfiles = {}
28
+
29
+ def __len__(self):
30
+ try:
31
+ return len(self.summary)
32
+ except TypeError:
33
+ return 0
34
+
35
+ def __getitem__(self, sl):
36
+ return self.summary[sl]
37
+
38
+ def __iter__(self):
39
+ return iter(self.summary)
40
+
41
+ @classmethod
42
+ def from_data_manager(cls, n_iter, seg_id, data_manager=None):
43
+ '''Construct and return a trajectory trace whose last segment is identified
44
+ by ``seg_id`` in the iteration number ``n_iter``.'''
45
+
46
+ data_manager = data_manager or westpa.rc.get_data_manager()
47
+
48
+ # These values are used later on
49
+ endpoint_type = None
50
+ pcoord_dtype = None
51
+ pcoord_pt_shape = None
52
+
53
+ seginfo = []
54
+ parent_id = seg_id
55
+
56
+ while n_iter > 0 and parent_id >= 0:
57
+ seg_id = parent_id
58
+ iter_group = data_manager.get_iter_group(n_iter)
59
+ pcoord_ds = iter_group['pcoord']
60
+ seg_index = iter_group['seg_index']
61
+ n_segs = pcoord_ds.shape[0]
62
+ pcoord_len = pcoord_ds.shape[1]
63
+
64
+ assert seg_id < n_segs
65
+
66
+ indexrow = seg_index[seg_id]
67
+ final_pcoord = pcoord_ds[seg_id, pcoord_len - 1]
68
+ weight = indexrow['weight']
69
+ cputime = indexrow['cputime']
70
+ walltime = indexrow['walltime']
71
+
72
+ try:
73
+ parent_id = int(indexrow['parent_id'])
74
+ except IndexError:
75
+ # old HDF5 version
76
+ parent_id = int(iter_group['parents'][indexrow['parents_offset']])
77
+
78
+ if endpoint_type is None:
79
+ endpoint_type = indexrow['endpoint_type']
80
+ pcoord_pt_shape = pcoord_ds.shape[2:]
81
+ pcoord_dtype = pcoord_ds.dtype
82
+
83
+ seginfo.append((n_iter, seg_id, weight, walltime, cputime, final_pcoord))
84
+
85
+ del iter_group, pcoord_ds, seg_index
86
+ n_iter -= 1
87
+
88
+ # loop terminates with parent_id set to the identifier of the initial state,
89
+ # seg_id set to the identifier of the first segment in the trajectory, and
90
+ # n_iter set to one less than the iteration of the first segment
91
+ first_iter = n_iter + 1
92
+ first_seg_id = seg_id
93
+ first_parent_id = parent_id
94
+
95
+ # Initial segment (for fetching initial state)
96
+ first_segment = Segment(n_iter=first_iter, seg_id=first_seg_id, parent_id=first_parent_id)
97
+
98
+ seginfo.reverse()
99
+
100
+ summary_dtype = np.dtype(
101
+ [
102
+ ('n_iter', n_iter_dtype),
103
+ ('seg_id', seg_id_dtype),
104
+ ('weight', weight_dtype),
105
+ ('walltime', utime_dtype),
106
+ ('cputime', utime_dtype),
107
+ ('final_pcoord', pcoord_dtype, pcoord_pt_shape),
108
+ ]
109
+ )
110
+
111
+ summary = np.array(seginfo, dtype=summary_dtype)
112
+
113
+ try:
114
+ initial_state = data_manager.get_segment_initial_states([first_segment], first_iter)[0]
115
+ except KeyError:
116
+ # old HDF5 version
117
+ assert parent_id < 0
118
+ istate_pcoord = data_manager.get_iter_group(first_iter)['pcoord'][first_seg_id, 0]
119
+ istate_id = -(first_parent_id + 1)
120
+ basis_state = None
121
+ initial_state = InitialState(istate_id, None, iter_created=0, pcoord=istate_pcoord)
122
+
123
+ else:
124
+ basis_state = data_manager.get_basis_states(first_iter)[initial_state.basis_state_id]
125
+
126
+ return cls(summary, endpoint_type, basis_state, initial_state, data_manager)
127
+
128
+ def get_segment_data_slice(self, datafile, dsname, n_iter, seg_id, slice_=None, index_data=None, iter_prec=None):
129
+ '''Return the data from the dataset named ``dsname`` within the given ``datafile`` (an open
130
+ h5py.File object) for the given iteration and segment. By default, it is assumed that the
131
+ dataset is stored in the iteration group for iteration ``n_iter``, but if ``index_data``
132
+ is provided, it must be an iterable (preferably a simple array) of (n_iter,seg_id) pairs,
133
+ and the index in the ``index_data`` iterable of the matching n_iter/seg_id pair is used as
134
+ the index of the data to retrieve.
135
+
136
+ If an optional ``slice_`` is provided, then the given slicing tuple is appended to that
137
+ used to retrieve the segment-specific data (i.e. it can be used to pluck a subset of the
138
+ data that would otherwise be returned).
139
+ '''
140
+
141
+ if slice_ is None:
142
+ slice_ = np.s_[...]
143
+
144
+ if index_data is not None:
145
+ dataset = datafile[dsname]
146
+
147
+ for i, (i_n_iter, i_seg_id) in enumerate(index_data):
148
+ if (i_n_iter, i_seg_id) == (n_iter, seg_id):
149
+ break
150
+ else:
151
+ raise KeyError((n_iter, seg_id))
152
+
153
+ itpl = (i,) + slice_
154
+ return dataset[itpl]
155
+ else:
156
+ if not iter_prec:
157
+ iter_prec = datafile.attrs.get('west_iter_prec', self.data_manager.default_iter_prec)
158
+ igname_tail = 'iter_{:0{iter_prec:d}d}'.format(int(n_iter), iter_prec=int(iter_prec))
159
+ try:
160
+ iter_group = datafile['/iterations/' + igname_tail]
161
+ except KeyError:
162
+ iter_group = datafile[igname_tail]
163
+
164
+ dataset = iter_group[dsname]
165
+ itpl = (seg_id,) + slice_
166
+
167
+ return dataset[itpl]
168
+
169
+ def trace_timepoint_dataset(self, dsname, slice_=None, auxfile=None, index_ds=None):
170
+ '''Return a trace along this trajectory over a dataset which is layed out as [seg_id][timepoint][...].
171
+ Overlapping values at segment boundaries are accounted for. Returns (data_trace, weight), where
172
+ data_trace is a time series of the dataset along this trajectory, and weight is the corresponding
173
+ trajectory weight at each time point.
174
+
175
+ If ``auxfile`` is given, then load the dataset from the given HDF5 file, which must be
176
+ layed out the same way as the main HDF5 file (e.g. iterations arranged as
177
+ iterations/iter_*).
178
+
179
+ If index_ds is given, instead of reading data per-iteration from iter_* groups, then the
180
+ given index_ds is used as an index of n_iter,seg_id pairs into ``dsname``. In this case,
181
+ the target data set need not exist on a per-iteration basis inside iter_* groups.
182
+
183
+ If ``slice_`` is given, then *further* slice the data returned from the HDF5 dataset. This can
184
+ minimize I/O if it is known (and specified) that only a subset of the data along the
185
+ trajectory is needed.
186
+ '''
187
+
188
+ # Figure out where to look for the dataset
189
+ if isinstance(auxfile, str):
190
+ datafile = h5py.File(auxfile, 'r')
191
+ close_datafile = True
192
+ elif auxfile is not None:
193
+ datafile = auxfile
194
+ close_datafile = False
195
+ else:
196
+ datafile = self.data_manager.we_h5file
197
+ close_datafile = False
198
+
199
+ iter_prec = self.data_manager.iter_prec
200
+ get_data_slice = self.get_segment_data_slice
201
+
202
+ # Load the index if we use it
203
+ if index_ds is not None:
204
+ if isinstance(index_ds, str):
205
+ index_ds = datafile[index_ds]
206
+ index_data = index_ds[...]
207
+ else:
208
+ index_data = None
209
+
210
+ # Be sure to retrieve the time series
211
+ if not slice_:
212
+ first_sl = np.index_exp[:, ...]
213
+ other_sl = np.index_exp[1:, ...]
214
+ else:
215
+ first_sl = np.index_exp[:] + slice_
216
+ other_sl = np.index_exp[1:] + slice_
217
+
218
+ # Retrieve the first segment's data
219
+ first_n_iter, first_seg_id = self.summary[0]['n_iter'], self.summary[0]['seg_id']
220
+ first_iter_data = get_data_slice(datafile, dsname, first_n_iter, first_seg_id, first_sl, index_data, iter_prec)
221
+
222
+ n_segs = len(self)
223
+ n_points_per_seg = len(first_iter_data)
224
+
225
+ length = n_points_per_seg + (n_segs - 1) * (n_points_per_seg - 1)
226
+ tracedata = np.empty((length,) + first_iter_data.shape[1:], dtype=first_iter_data.dtype)
227
+ traceweight = np.empty((length,), weight_dtype)
228
+
229
+ # Store first segment data
230
+ tracedata[0:n_points_per_seg] = first_iter_data
231
+ traceweight[0:n_points_per_seg] = self.summary[0]['weight']
232
+ del first_iter_data
233
+
234
+ # Store remainder of data
235
+
236
+ for iseg, summary_item in enumerate(self.summary[1:]):
237
+ n_iter = summary_item['n_iter']
238
+ seg_id = summary_item['seg_id']
239
+ weight = summary_item['weight']
240
+
241
+ offset = n_points_per_seg + iseg * (n_points_per_seg - 1)
242
+ length = n_points_per_seg - 1
243
+ seg_data = get_data_slice(datafile, dsname, n_iter, seg_id, other_sl, index_data, iter_prec)
244
+
245
+ tracedata[offset : offset + length] = seg_data
246
+ traceweight[offset : offset + length] = weight
247
+ del seg_data
248
+
249
+ if close_datafile:
250
+ datafile.close()
251
+
252
+ return tracedata, traceweight
253
+
254
+ """
255
+ # This is disabled until there is a real use for it; the following code is
256
+ # outdated
257
+ def trace_perseg_dataset(self, dsname):
258
+ '''Return a trace along this trajectory over a dataset which is layed out as [seg_id][...].
259
+ Returns (data_trace, weight), where data_trace is a time series of the dataset along this
260
+ trajectory, and weight is the corresponding trajectory weight at each time point.'''
261
+
262
+ first_n_iter, first_seg_id = self.summary[0]['n_iter'], self.summary[0]['seg_id']
263
+ first_iter_group = self.data_manager.get_iter_group(first_n_iter)
264
+ first_iter_ds = first_iter_group[dsname]
265
+ n_segs = len(self)
266
+ tracedata = np.empty((n_segs,) + first_iter_ds.shape[1:], dtype=first_iter_ds.dtype)
267
+ traceweight = np.empty((n_segs,), weight_dtype)
268
+ tracedata[0] = first_iter_ds[first_seg_id]
269
+ traceweight[0] = self.summary[0]['weight']
270
+ for isegm1, summary_item in enumerate(self.summary[1:]):
271
+ iseg = isegm1 + 1
272
+ n_iter = summary_item['n_iter']
273
+ seg_id = summary_item['seg_id']
274
+ iter_group = self.data_manager.get_iter_group(n_iter)
275
+ seg_data = iter_group[dsname][seg_id]
276
+ tracedata[iseg] = seg_data
277
+ traceweight[iseg] = summary_item['weight']
278
+ del seg_data
279
+
280
+ return tracedata, traceweight
281
+ """
282
+
283
+
284
+ class WTraceTool(WESTTool):
285
+ prog = 'w_trace'
286
+ description = '''\
287
+ Trace individual WEST trajectories and emit (or calculate) quantities along the
288
+ trajectory.
289
+
290
+ Trajectories are specified as N_ITER:SEG_ID pairs. Each segment is traced back
291
+ to its initial point, and then various quantities (notably n_iter and seg_id)
292
+ are printed in order from initial point up until the given segment in the given
293
+ iteration.
294
+
295
+ Output is stored in several files, all named according to the pattern given by
296
+ the -o/--output-pattern parameter. The default output pattern is "traj_%d_%d",
297
+ where the printf-style format codes are replaced by the iteration number and
298
+ segment ID of the terminal segment of the trajectory being traced.
299
+
300
+ Individual datasets can be selected for writing using the -d/--dataset option
301
+ (which may be specified more than once). The simplest form is ``-d dsname``,
302
+ which causes data from dataset ``dsname`` along the trace to be stored to
303
+ HDF5. The dataset is assumed to be stored on a per-iteration basis, with
304
+ the first dimension corresponding to seg_id and the second dimension
305
+ corresponding to time within the segment. Further options are specified
306
+ as comma-separated key=value pairs after the data set name, as in
307
+
308
+ -d dsname,alias=newname,index=idsname,file=otherfile.h5,slice=[100,...]
309
+
310
+ The following options for datasets are supported:
311
+
312
+ alias=newname
313
+ When writing this data to HDF5 or text files, use ``newname``
314
+ instead of ``dsname`` to identify the dataset. This is mostly of
315
+ use in conjunction with the ``slice`` option in order, e.g., to
316
+ retrieve two different slices of a dataset and store then with
317
+ different names for future use.
318
+
319
+ index=idsname
320
+ The dataset is not stored on a per-iteration basis for all
321
+ segments, but instead is stored as a single dataset whose
322
+ first dimension indexes n_iter/seg_id pairs. The index to
323
+ these n_iter/seg_id pairs is ``idsname``.
324
+
325
+ file=otherfile.h5
326
+ Instead of reading data from the main WEST HDF5 file (usually
327
+ ``west.h5``), read data from ``otherfile.h5``.
328
+
329
+ slice=[100,...]
330
+ Retrieve only the given slice from the dataset. This can be
331
+ used to pick a subset of interest to minimize I/O.
332
+
333
+ -------------------------------------------------------------------------------
334
+ '''
335
+
336
+ pcoord_formats = {
337
+ 'u8': '%20d',
338
+ 'i8': '%20d',
339
+ 'u4': '%10d',
340
+ 'i4': '%11d',
341
+ 'u2': '%5d',
342
+ 'i2': '%6d',
343
+ 'f4': '%14.7g',
344
+ 'f8': '%023.15g',
345
+ }
346
+
347
+ def __init__(self):
348
+ super().__init__()
349
+
350
+ self.data_reader = WESTDataReader()
351
+ # self.h5storage = HDF5Storage()
352
+ self.output_file = None
353
+ self.output_pattern = None
354
+ self.endpoints = None
355
+ self.datasets = []
356
+
357
+ # Interface for command-line tools
358
+ def add_args(self, parser):
359
+ self.data_reader.add_args(parser)
360
+ # self.h5storage.add_args(parser)
361
+ parser.add_argument(
362
+ '-d',
363
+ '--dataset',
364
+ dest='datasets',
365
+ # this breaks argparse (see http://bugs.python.org/issue11874)
366
+ # metavar='DSNAME[,alias=ALIAS][,index=INDEX][,file=FILE][,slice=SLICE]',
367
+ metavar='DSNAME',
368
+ action='append',
369
+ help='''Include the dataset named DSNAME in trace output. An extended form like
370
+ DSNAME[,alias=ALIAS][,index=INDEX][,file=FILE][,slice=SLICE] will
371
+ obtain the dataset from the given FILE instead of the main WEST HDF5 file,
372
+ slice it by SLICE, call it ALIAS in output, and/or access per-segment data by a n_iter,seg_id
373
+ INDEX instead of a seg_id indexed dataset in the group for n_iter.''',
374
+ )
375
+ parser.add_argument(
376
+ 'endpoints',
377
+ metavar='N_ITER:SEG_ID',
378
+ nargs='+',
379
+ help='''Trace trajectory ending (or at least alive at) N_ITER:SEG_ID.''',
380
+ )
381
+
382
+ # tgroup = parser.add_argument_group('trace options')
383
+ ogroup = parser.add_argument_group('output options')
384
+ ogroup.add_argument(
385
+ '--output-pattern',
386
+ default='traj_%d_%d',
387
+ help='''Write per-trajectory data to output files/HDF5 groups whose names begin with OUTPUT_PATTERN,
388
+ which must contain two printf-style format flags which will be replaced with the iteration number
389
+ and segment ID of the terminal segment of the trajectory being traced.
390
+ (Default: %(default)s.)''',
391
+ )
392
+ ogroup.add_argument(
393
+ '-o',
394
+ '--output',
395
+ default='trajs.h5',
396
+ help='Store intermediate data and analysis results to OUTPUT (default: %(default)s).',
397
+ )
398
+
399
+ def process_args(self, args):
400
+ self.data_reader.process_args(args)
401
+ # self.h5storage.process_args(args)
402
+ self.endpoints = [list(map(int, endpoint.split(':'))) for endpoint in args.endpoints]
403
+ self.output_pattern = args.output_pattern
404
+
405
+ for dsstr in args.datasets or []:
406
+ self.datasets.append(self.parse_dataset_string(dsstr))
407
+
408
+ # self.h5storage.open_analysis_h5file()
409
+ self.output_file = h5py.File(args.output, 'a')
410
+
411
+ def parse_dataset_string(self, dsstr):
412
+ dsinfo = {}
413
+
414
+ r = re.compile(r',(?=[^\]]*(?:\[|$))')
415
+ fields = r.split(dsstr)
416
+
417
+ dsinfo['dsname'] = fields[0]
418
+
419
+ for field in (field.strip() for field in fields[1:]):
420
+ k, v = field.split('=')
421
+ k = k.lower()
422
+ if k in ('alias', 'file', 'index'):
423
+ dsinfo[k] = v
424
+ elif k == 'slice':
425
+ try:
426
+ dsinfo['slice'] = eval('np.index_exp' + v)
427
+ except SyntaxError:
428
+ raise SyntaxError('invalid index expression {!r}'.format(v))
429
+ else:
430
+ raise ValueError('invalid dataset option {!r}'.format(k))
431
+
432
+ return dsinfo
433
+
434
+ def go(self):
435
+ self.data_reader.open('r')
436
+
437
+ # Create a new 'trajectories' group if this is the first trace
438
+ try:
439
+ trajs_group = h5io.create_hdf5_group(self.output_file, 'trajectories', replace=False, creating_program=self.prog)
440
+ except ValueError:
441
+ trajs_group = self.output_file['trajectories']
442
+
443
+ for n_iter, seg_id in self.endpoints:
444
+ trajname = self.output_pattern % (n_iter, seg_id)
445
+ trajgroup = trajs_group.create_group(trajname)
446
+
447
+ trace = Trace.from_data_manager(n_iter, seg_id, self.data_reader.data_manager)
448
+
449
+ with open(trajname + '_trace.txt', 'wt') as trace_output:
450
+ self.emit_trace_text(trace, trace_output)
451
+
452
+ self.emit_trace_h5(trace, trajgroup)
453
+
454
+ aux_h5files = {}
455
+ for dsinfo in self.datasets:
456
+ dsname = dsinfo['dsname']
457
+ filename = dsinfo.get('file')
458
+ if filename:
459
+ try:
460
+ aux_h5file = aux_h5files[filename]
461
+ except KeyError:
462
+ aux_h5file = aux_h5files[filename] = h5py.File(filename, 'r')
463
+ else:
464
+ aux_h5file = None
465
+
466
+ slice_ = dsinfo.get('slice')
467
+ alias = dsinfo.get('alias', dsname)
468
+ index = dsinfo.get('index')
469
+
470
+ data, weights = trace.trace_timepoint_dataset(dsname, auxfile=aux_h5file, slice_=slice_, index_ds=index)
471
+
472
+ # Save data to HDF5
473
+ try:
474
+ del trajgroup[alias]
475
+ except KeyError:
476
+ pass
477
+ trajgroup[alias] = data
478
+
479
+ # All weight vectors will be the same length, so only store in HDF5 once
480
+ if not ('weights' in trajgroup and trajgroup['weights'].shape == weights.shape):
481
+ try:
482
+ del trajgroup['weights']
483
+ except KeyError:
484
+ pass
485
+ trajgroup['weights'] = weights
486
+
487
+ def emit_trace_h5(self, trace, output_group):
488
+ for dsname in ('basis_state', 'initial_state', 'segments'):
489
+ try:
490
+ del output_group[dsname]
491
+ except KeyError:
492
+ pass
493
+
494
+ if trace.basis_state:
495
+ output_group['basis_state'] = trace.basis_state.as_numpy_record()
496
+ output_group['initial_state'] = trace.initial_state.as_numpy_record()
497
+ output_group['segments'] = trace.summary
498
+
499
+ def emit_trace_text(self, trace, output_file):
500
+ '''Dump summary information about each segment in the given trace to the given output_file,
501
+ which must be opened for writing in text mode. Output columns are separated by at least
502
+ one space.'''
503
+
504
+ if not trace:
505
+ return
506
+
507
+ pcoord_ndim = trace[0]['final_pcoord'].shape[0]
508
+ lastseg = trace[-1]
509
+ len_n_iter = max(6, len(str(lastseg['n_iter'])))
510
+ len_seg_id = max(6, max(len(str(seg_id)) for seg_id in trace['seg_id']))
511
+ seg_pattern = (
512
+ ' '.join(
513
+ [
514
+ '{n_iter:{len_n_iter}d}',
515
+ '{seg_id:{len_seg_id}d}',
516
+ '{weight:22.17e}',
517
+ '{walltime:10.6g}',
518
+ '{cputime:10.6g}',
519
+ '{pcoord_str:s}',
520
+ ]
521
+ )
522
+ + '\n'
523
+ )
524
+
525
+ output_file.write(
526
+ '''\
527
+ # Trace of trajectory ending in n_iter:seg_id {n_iter:d}:{seg_id:d} (endpoint type {endpoint_type_text:s})
528
+ # column 0: iteration (0 => initial state)
529
+ # column 1: seg_id (or initial state ID)
530
+ # column 2: weight
531
+ # column 3: wallclock time (s)
532
+ # column 4: CPU time (s)
533
+ '''.format(
534
+ n_iter=int(lastseg['n_iter']),
535
+ seg_id=int(lastseg['seg_id']),
536
+ endpoint_type_text=Segment.endpoint_type_names[trace.endpoint_type],
537
+ )
538
+ )
539
+
540
+ if pcoord_ndim == 1:
541
+ output_file.write(
542
+ '''\
543
+ # column 5: final progress coordinate value
544
+ '''
545
+ )
546
+ else:
547
+ fpcbegin = 5
548
+ fpcend = fpcbegin + pcoord_ndim - 1
549
+ output_file.write(
550
+ '''\
551
+ # columns {fpcbegin:d} -- {fpcend:d}: final progress coordinate value
552
+ '''.format(
553
+ fpcbegin=fpcbegin, fpcend=fpcend
554
+ )
555
+ )
556
+
557
+ pcoord_formats = self.pcoord_formats
558
+
559
+ # Output row for initial state
560
+ initial_state = trace.initial_state
561
+ pcoord_str = ' '.join(pcoord_formats.get(pcfield.dtype.str[1:], '%s') % pcfield for pcfield in initial_state.pcoord)
562
+ output_file.write(
563
+ seg_pattern.format(
564
+ n_iter=0,
565
+ seg_id=initial_state.state_id,
566
+ weight=0.0,
567
+ walltime=0,
568
+ cputime=0,
569
+ pcoord_str=pcoord_str,
570
+ len_n_iter=len_n_iter,
571
+ len_seg_id=len_seg_id,
572
+ )
573
+ )
574
+
575
+ # Output rows for segments
576
+ for segment in trace:
577
+ pcoord_str = ' '.join(
578
+ pcoord_formats.get(pcfield.dtype.str[1:], '%s') % pcfield for pcfield in segment['final_pcoord']
579
+ )
580
+ output_file.write(
581
+ seg_pattern.format(
582
+ n_iter=int(segment['n_iter']),
583
+ seg_id=int(segment['seg_id']),
584
+ weight=float(segment['weight']),
585
+ walltime=float(segment['walltime']),
586
+ cputime=float(segment['cputime']),
587
+ pcoord_str=pcoord_str,
588
+ len_n_iter=len_n_iter,
589
+ len_seg_id=len_seg_id,
590
+ )
591
+ )
592
+
593
+
594
+ def entry_point():
595
+ WTraceTool().main()
596
+
597
+
598
+ if __name__ == '__main__':
599
+ entry_point()
File without changes