westpa 2022.10__cp312-cp312-macosx_10_9_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of westpa might be problematic. Click here for more details.

Files changed (150) hide show
  1. westpa/__init__.py +14 -0
  2. westpa/_version.py +21 -0
  3. westpa/analysis/__init__.py +5 -0
  4. westpa/analysis/core.py +746 -0
  5. westpa/analysis/statistics.py +27 -0
  6. westpa/analysis/trajectories.py +360 -0
  7. westpa/cli/__init__.py +0 -0
  8. westpa/cli/core/__init__.py +0 -0
  9. westpa/cli/core/w_fork.py +152 -0
  10. westpa/cli/core/w_init.py +230 -0
  11. westpa/cli/core/w_run.py +77 -0
  12. westpa/cli/core/w_states.py +212 -0
  13. westpa/cli/core/w_succ.py +99 -0
  14. westpa/cli/core/w_truncate.py +59 -0
  15. westpa/cli/tools/__init__.py +0 -0
  16. westpa/cli/tools/ploterr.py +506 -0
  17. westpa/cli/tools/plothist.py +706 -0
  18. westpa/cli/tools/w_assign.py +596 -0
  19. westpa/cli/tools/w_bins.py +166 -0
  20. westpa/cli/tools/w_crawl.py +119 -0
  21. westpa/cli/tools/w_direct.py +547 -0
  22. westpa/cli/tools/w_dumpsegs.py +94 -0
  23. westpa/cli/tools/w_eddist.py +506 -0
  24. westpa/cli/tools/w_fluxanl.py +378 -0
  25. westpa/cli/tools/w_ipa.py +833 -0
  26. westpa/cli/tools/w_kinavg.py +127 -0
  27. westpa/cli/tools/w_kinetics.py +96 -0
  28. westpa/cli/tools/w_multi_west.py +414 -0
  29. westpa/cli/tools/w_ntop.py +213 -0
  30. westpa/cli/tools/w_pdist.py +515 -0
  31. westpa/cli/tools/w_postanalysis_matrix.py +82 -0
  32. westpa/cli/tools/w_postanalysis_reweight.py +53 -0
  33. westpa/cli/tools/w_red.py +486 -0
  34. westpa/cli/tools/w_reweight.py +780 -0
  35. westpa/cli/tools/w_select.py +226 -0
  36. westpa/cli/tools/w_stateprobs.py +111 -0
  37. westpa/cli/tools/w_trace.py +599 -0
  38. westpa/core/__init__.py +0 -0
  39. westpa/core/_rc.py +673 -0
  40. westpa/core/binning/__init__.py +55 -0
  41. westpa/core/binning/_assign.cpython-312-darwin.so +0 -0
  42. westpa/core/binning/assign.py +449 -0
  43. westpa/core/binning/binless.py +96 -0
  44. westpa/core/binning/binless_driver.py +54 -0
  45. westpa/core/binning/binless_manager.py +190 -0
  46. westpa/core/binning/bins.py +47 -0
  47. westpa/core/binning/mab.py +427 -0
  48. westpa/core/binning/mab_driver.py +54 -0
  49. westpa/core/binning/mab_manager.py +198 -0
  50. westpa/core/data_manager.py +1694 -0
  51. westpa/core/extloader.py +74 -0
  52. westpa/core/h5io.py +995 -0
  53. westpa/core/kinetics/__init__.py +24 -0
  54. westpa/core/kinetics/_kinetics.cpython-312-darwin.so +0 -0
  55. westpa/core/kinetics/events.py +147 -0
  56. westpa/core/kinetics/matrates.py +156 -0
  57. westpa/core/kinetics/rate_averaging.py +266 -0
  58. westpa/core/progress.py +218 -0
  59. westpa/core/propagators/__init__.py +54 -0
  60. westpa/core/propagators/executable.py +715 -0
  61. westpa/core/reweight/__init__.py +14 -0
  62. westpa/core/reweight/_reweight.cpython-312-darwin.so +0 -0
  63. westpa/core/reweight/matrix.py +126 -0
  64. westpa/core/segment.py +119 -0
  65. westpa/core/sim_manager.py +830 -0
  66. westpa/core/states.py +359 -0
  67. westpa/core/systems.py +93 -0
  68. westpa/core/textio.py +74 -0
  69. westpa/core/trajectory.py +330 -0
  70. westpa/core/we_driver.py +908 -0
  71. westpa/core/wm_ops.py +43 -0
  72. westpa/core/yamlcfg.py +391 -0
  73. westpa/fasthist/__init__.py +34 -0
  74. westpa/fasthist/__main__.py +110 -0
  75. westpa/fasthist/_fasthist.cpython-312-darwin.so +0 -0
  76. westpa/mclib/__init__.py +264 -0
  77. westpa/mclib/__main__.py +28 -0
  78. westpa/mclib/_mclib.cpython-312-darwin.so +0 -0
  79. westpa/oldtools/__init__.py +4 -0
  80. westpa/oldtools/aframe/__init__.py +35 -0
  81. westpa/oldtools/aframe/atool.py +75 -0
  82. westpa/oldtools/aframe/base_mixin.py +26 -0
  83. westpa/oldtools/aframe/binning.py +178 -0
  84. westpa/oldtools/aframe/data_reader.py +560 -0
  85. westpa/oldtools/aframe/iter_range.py +200 -0
  86. westpa/oldtools/aframe/kinetics.py +117 -0
  87. westpa/oldtools/aframe/mcbs.py +146 -0
  88. westpa/oldtools/aframe/output.py +39 -0
  89. westpa/oldtools/aframe/plotting.py +90 -0
  90. westpa/oldtools/aframe/trajwalker.py +126 -0
  91. westpa/oldtools/aframe/transitions.py +469 -0
  92. westpa/oldtools/cmds/__init__.py +0 -0
  93. westpa/oldtools/cmds/w_ttimes.py +358 -0
  94. westpa/oldtools/files.py +34 -0
  95. westpa/oldtools/miscfn.py +23 -0
  96. westpa/oldtools/stats/__init__.py +4 -0
  97. westpa/oldtools/stats/accumulator.py +35 -0
  98. westpa/oldtools/stats/edfs.py +129 -0
  99. westpa/oldtools/stats/mcbs.py +89 -0
  100. westpa/tools/__init__.py +33 -0
  101. westpa/tools/binning.py +472 -0
  102. westpa/tools/core.py +340 -0
  103. westpa/tools/data_reader.py +159 -0
  104. westpa/tools/dtypes.py +31 -0
  105. westpa/tools/iter_range.py +198 -0
  106. westpa/tools/kinetics_tool.py +340 -0
  107. westpa/tools/plot.py +283 -0
  108. westpa/tools/progress.py +17 -0
  109. westpa/tools/selected_segs.py +154 -0
  110. westpa/tools/wipi.py +751 -0
  111. westpa/trajtree/__init__.py +4 -0
  112. westpa/trajtree/_trajtree.cpython-312-darwin.so +0 -0
  113. westpa/trajtree/trajtree.py +117 -0
  114. westpa/westext/__init__.py +0 -0
  115. westpa/westext/adaptvoronoi/__init__.py +3 -0
  116. westpa/westext/adaptvoronoi/adaptVor_driver.py +214 -0
  117. westpa/westext/hamsm_restarting/__init__.py +3 -0
  118. westpa/westext/hamsm_restarting/example_overrides.py +35 -0
  119. westpa/westext/hamsm_restarting/restart_driver.py +1165 -0
  120. westpa/westext/stringmethod/__init__.py +11 -0
  121. westpa/westext/stringmethod/fourier_fitting.py +69 -0
  122. westpa/westext/stringmethod/string_driver.py +253 -0
  123. westpa/westext/stringmethod/string_method.py +306 -0
  124. westpa/westext/weed/BinCluster.py +180 -0
  125. westpa/westext/weed/ProbAdjustEquil.py +100 -0
  126. westpa/westext/weed/UncertMath.py +247 -0
  127. westpa/westext/weed/__init__.py +10 -0
  128. westpa/westext/weed/weed_driver.py +182 -0
  129. westpa/westext/wess/ProbAdjust.py +101 -0
  130. westpa/westext/wess/__init__.py +6 -0
  131. westpa/westext/wess/wess_driver.py +207 -0
  132. westpa/work_managers/__init__.py +57 -0
  133. westpa/work_managers/core.py +396 -0
  134. westpa/work_managers/environment.py +134 -0
  135. westpa/work_managers/mpi.py +318 -0
  136. westpa/work_managers/processes.py +187 -0
  137. westpa/work_managers/serial.py +28 -0
  138. westpa/work_managers/threads.py +79 -0
  139. westpa/work_managers/zeromq/__init__.py +20 -0
  140. westpa/work_managers/zeromq/core.py +641 -0
  141. westpa/work_managers/zeromq/node.py +131 -0
  142. westpa/work_managers/zeromq/work_manager.py +526 -0
  143. westpa/work_managers/zeromq/worker.py +320 -0
  144. westpa-2022.10.dist-info/AUTHORS +22 -0
  145. westpa-2022.10.dist-info/LICENSE +21 -0
  146. westpa-2022.10.dist-info/METADATA +183 -0
  147. westpa-2022.10.dist-info/RECORD +150 -0
  148. westpa-2022.10.dist-info/WHEEL +5 -0
  149. westpa-2022.10.dist-info/entry_points.txt +29 -0
  150. westpa-2022.10.dist-info/top_level.txt +1 -0
@@ -0,0 +1,596 @@
1
+ import logging
2
+ import math
3
+ import os
4
+
5
+ import numpy as np
6
+ from numpy import index_exp
7
+
8
+ from westpa.core.data_manager import seg_id_dtype, weight_dtype
9
+ from westpa.core.binning import index_dtype, assign_and_label, accumulate_labeled_populations
10
+ from westpa.tools import WESTParallelTool, WESTDataReader, WESTDSSynthesizer, BinMappingComponent, ProgressIndicatorComponent
11
+ import westpa
12
+ from westpa.core import h5io
13
+ from westpa.core.h5io import WESTPAH5File
14
+ from westpa.core.extloader import get_object
15
+
16
+ log = logging.getLogger('w_assign')
17
+
18
+
19
+ # Changes to keep it alive...
20
+ def parse_pcoord_value(pc_str):
21
+ namespace = {'math': math, 'numpy': np, 'np': np, 'inf': float('inf')}
22
+
23
+ arr = np.array(eval(pc_str, namespace))
24
+ if arr.ndim == 0:
25
+ arr.shape = (1, 1)
26
+ elif arr.ndim == 1:
27
+ arr.shape = (1,) + arr.shape
28
+ else:
29
+ raise ValueError('too many dimensions')
30
+ return arr
31
+
32
+
33
+ def _assign_label_pop(
34
+ n_iter, lb, ub, mapper, nstates, state_map, last_labels, parent_id_dsspec, weight_dsspec, pcoord_dsspec, subsample
35
+ ):
36
+ nbins = len(state_map) - 1
37
+ parent_ids = parent_id_dsspec.get_iter_data(n_iter, index_exp[lb:ub])
38
+ weights = weight_dsspec.get_iter_data(n_iter, index_exp[lb:ub])
39
+ pcoords = pcoord_dsspec.get_iter_data(n_iter, index_exp[lb:ub])
40
+
41
+ assignments, trajlabels, statelabels = assign_and_label(
42
+ lb, ub, parent_ids, mapper.assign, nstates, state_map, last_labels, pcoords, subsample
43
+ )
44
+ pops = np.zeros((nstates + 1, nbins + 1), weight_dtype)
45
+ accumulate_labeled_populations(weights, assignments, trajlabels, pops)
46
+ return (assignments, trajlabels, pops, lb, ub, statelabels)
47
+
48
+
49
+ class WAssign(WESTParallelTool):
50
+ prog = 'w_assign'
51
+ description = '''\
52
+ Assign walkers to bins, producing a file (by default named "assign.h5")
53
+ which can be used in subsequent analysis.
54
+
55
+ For consistency in subsequent analysis operations, the entire dataset
56
+ must be assigned, even if only a subset of the data will be used. This
57
+ ensures that analyses that rely on tracing trajectories always know the
58
+ originating bin of each trajectory.
59
+
60
+
61
+ -----------------------------------------------------------------------------
62
+ Source data
63
+ -----------------------------------------------------------------------------
64
+
65
+ Source data is provided either by a user-specified function
66
+ (--construct-dataset) or a list of "data set specifications" (--dsspecs).
67
+ If neither is provided, the progress coordinate dataset ''pcoord'' is used.
68
+
69
+ To use a custom function to extract or calculate data whose probability
70
+ distribution will be calculated, specify the function in standard Python
71
+ MODULE.FUNCTION syntax as the argument to --construct-dataset. This function
72
+ will be called as function(n_iter,iter_group), where n_iter is the iteration
73
+ whose data are being considered and iter_group is the corresponding group
74
+ in the main WEST HDF5 file (west.h5). The function must return data which can
75
+ be indexed as [segment][timepoint][dimension].
76
+
77
+ To use a list of data set specifications, specify --dsspecs and then list the
78
+ desired datasets one-by-one (space-separated in most shells). These data set
79
+ specifications are formatted as NAME[,file=FILENAME,slice=SLICE], which will
80
+ use the dataset called NAME in the HDF5 file FILENAME (defaulting to the main
81
+ WEST HDF5 file west.h5), and slice it with the Python slice expression SLICE
82
+ (as in [0:2] to select the first two elements of the first axis of the
83
+ dataset). The ``slice`` option is most useful for selecting one column (or
84
+ more) from a multi-column dataset, such as arises when using a progress
85
+ coordinate of multiple dimensions.
86
+
87
+
88
+ -----------------------------------------------------------------------------
89
+ Specifying macrostates
90
+ -----------------------------------------------------------------------------
91
+
92
+ Optionally, kinetic macrostates may be defined in terms of sets of bins.
93
+ Each trajectory will be labeled with the kinetic macrostate it was most
94
+ recently in at each timepoint, for use in subsequent kinetic analysis.
95
+ This is required for all kinetics analysis (w_kintrace and w_kinmat).
96
+
97
+ There are three ways to specify macrostates:
98
+
99
+ 1. States corresponding to single bins may be identified on the command
100
+ line using the --states option, which takes multiple arguments, one for
101
+ each state (separated by spaces in most shells). Each state is specified
102
+ as a coordinate tuple, with an optional label prepended, as in
103
+ ``bound:1.0`` or ``unbound:(2.5,2.5)``. Unlabeled states are named
104
+ ``stateN``, where N is the (zero-based) position in the list of states
105
+ supplied to --states.
106
+
107
+ 2. States corresponding to multiple bins may use a YAML input file specified
108
+ with --states-from-file. This file defines a list of states, each with a
109
+ name and a list of coordinate tuples; bins containing these coordinates
110
+ will be mapped to the containing state. For instance, the following
111
+ file::
112
+
113
+ ---
114
+ states:
115
+ - label: unbound
116
+ coords:
117
+ - [9.0, 1.0]
118
+ - [9.0, 2.0]
119
+ - label: bound
120
+ coords:
121
+ - [0.1, 0.0]
122
+
123
+ produces two macrostates: the first state is called "unbound" and
124
+ consists of bins containing the (2-dimensional) progress coordinate
125
+ values (9.0, 1.0) and (9.0, 2.0); the second state is called "bound"
126
+ and consists of the single bin containing the point (0.1, 0.0).
127
+
128
+ 3. Arbitrary state definitions may be supplied by a user-defined function,
129
+ specified as --states-from-function=MODULE.FUNCTION. This function is
130
+ called with the bin mapper as an argument (``function(mapper)``) and must
131
+ return a list of dictionaries, one per state. Each dictionary must contain
132
+ a vector of coordinate tuples with key "coords"; the bins into which each
133
+ of these tuples falls define the state. An optional name for the state
134
+ (with key "label") may also be provided.
135
+
136
+
137
+ -----------------------------------------------------------------------------
138
+ Output format
139
+ -----------------------------------------------------------------------------
140
+
141
+ The output file (-o/--output, by default "assign.h5") contains the following
142
+ attributes datasets:
143
+
144
+ ``nbins`` attribute
145
+ *(Integer)* Number of valid bins. Bin assignments range from 0 to
146
+ *nbins*-1, inclusive.
147
+
148
+ ``nstates`` attribute
149
+ *(Integer)* Number of valid macrostates (may be zero if no such states are
150
+ specified). Trajectory ensemble assignments range from 0 to *nstates*-1,
151
+ inclusive, when states are defined.
152
+
153
+ ``/assignments`` [iteration][segment][timepoint]
154
+ *(Integer)* Per-segment and -timepoint assignments (bin indices).
155
+
156
+ ``/npts`` [iteration]
157
+ *(Integer)* Number of timepoints in each iteration.
158
+
159
+ ``/nsegs`` [iteration]
160
+ *(Integer)* Number of segments in each iteration.
161
+
162
+ ``/labeled_populations`` [iterations][state][bin]
163
+ *(Floating-point)* Per-iteration and -timepoint bin populations, labeled
164
+ by most recently visited macrostate. The last state entry (*nstates-1*)
165
+ corresponds to trajectories initiated outside of a defined macrostate.
166
+
167
+ ``/bin_labels`` [bin]
168
+ *(String)* Text labels of bins.
169
+
170
+ When macrostate assignments are given, the following additional datasets are
171
+ present:
172
+
173
+ ``/trajlabels`` [iteration][segment][timepoint]
174
+ *(Integer)* Per-segment and -timepoint trajectory labels, indicating the
175
+ macrostate which each trajectory last visited.
176
+
177
+ ``/state_labels`` [state]
178
+ *(String)* Labels of states.
179
+
180
+ ``/state_map`` [bin]
181
+ *(Integer)* Mapping of bin index to the macrostate containing that bin.
182
+ An entry will contain *nbins+1* if that bin does not fall into a
183
+ macrostate.
184
+
185
+ Datasets indexed by state and bin contain one more entry than the number of
186
+ valid states or bins. For *N* bins, axes indexed by bin are of size *N+1*, and
187
+ entry *N* (0-based indexing) corresponds to a walker outside of the defined bin
188
+ space (which will cause most mappers to raise an error). More importantly, for
189
+ *M* states (including the case *M=0* where no states are specified), axes
190
+ indexed by state are of size *M+1* and entry *M* refers to trajectories
191
+ initiated in a region not corresponding to a defined macrostate.
192
+
193
+ Thus, ``labeled_populations[:,:,:].sum(axis=1)[:,:-1]`` gives overall per-bin
194
+ populations, for all defined bins and
195
+ ``labeled_populations[:,:,:].sum(axis=2)[:,:-1]`` gives overall
196
+ per-trajectory-ensemble populations for all defined states.
197
+
198
+
199
+ -----------------------------------------------------------------------------
200
+ Parallelization
201
+ -----------------------------------------------------------------------------
202
+
203
+ This tool supports parallelized binning, including reading/calculating input
204
+ data.
205
+
206
+
207
+ -----------------------------------------------------------------------------
208
+ Command-line options
209
+ -----------------------------------------------------------------------------
210
+ '''
211
+
212
+ def __init__(self):
213
+ super().__init__()
214
+
215
+ # Parallel processing by default (this is not actually necessary, but it is
216
+ # informative!)
217
+ self.wm_env.default_work_manager = self.wm_env.default_parallel_work_manager
218
+
219
+ self.data_reader = WESTDataReader()
220
+ self.dssynth = WESTDSSynthesizer(default_dsname='pcoord')
221
+ self.binning = BinMappingComponent()
222
+ self.progress = ProgressIndicatorComponent()
223
+ self.output_file = None
224
+ self.output_filename = None
225
+ self.states = []
226
+ self.subsample = False
227
+
228
+ def add_args(self, parser):
229
+ self.data_reader.add_args(parser)
230
+ self.binning.add_args(parser)
231
+ self.dssynth.add_args(parser)
232
+
233
+ sgroup = parser.add_argument_group('macrostate definitions').add_mutually_exclusive_group()
234
+ sgroup.add_argument(
235
+ '--states',
236
+ nargs='+',
237
+ metavar='STATEDEF',
238
+ help='''Single-bin kinetic macrostate, specified by a coordinate tuple (e.g. '1.0' or '[1.0,1.0]'),
239
+ optionally labeled (e.g. 'bound:[1.0,1.0]'). States corresponding to multiple bins
240
+ must be specified with --states-from-file.''',
241
+ )
242
+ sgroup.add_argument(
243
+ '--states-from-file',
244
+ metavar='STATEFILE',
245
+ help='''Load kinetic macrostates from the YAML file STATEFILE. See description
246
+ above for the appropriate structure.''',
247
+ )
248
+ sgroup.add_argument(
249
+ '--states-from-function',
250
+ metavar='STATEFUNC',
251
+ help='''Load kinetic macrostates from the function STATEFUNC, specified as
252
+ module_name.func_name. This function is called with the bin mapper as an argument,
253
+ and must return a list of dictionaries {'label': state_label, 'coords': 2d_array_like}
254
+ one for each macrostate; the 'coords' entry must contain enough rows to identify all bins
255
+ in the macrostate.''',
256
+ )
257
+
258
+ agroup = parser.add_argument_group('other options')
259
+ agroup.add_argument(
260
+ '-o', '--output', dest='output', default='assign.h5', help='''Store results in OUTPUT (default: %(default)s).'''
261
+ )
262
+ agroup.add_argument(
263
+ '--subsample',
264
+ dest='subsample',
265
+ action='store_const',
266
+ const=True,
267
+ help='''Determines whether or not the data should be subsampled.
268
+ This is rather useful for analysing steady state simulations.''',
269
+ )
270
+ agroup.add_argument(
271
+ '--config-from-file',
272
+ dest='config_from_file',
273
+ action='store_true',
274
+ help='''Load bins/macrostates from a scheme specified in west.cfg.''',
275
+ )
276
+ agroup.add_argument('--scheme-name', dest='scheme', help='''Name of scheme specified in west.cfg.''')
277
+
278
+ def process_args(self, args):
279
+ self.progress.process_args(args)
280
+ self.data_reader.process_args(args)
281
+ # Necessary to open the file to get the current iteration
282
+ # if we want to use the mapper in the file
283
+ self.data_reader.open(mode='r+')
284
+ self.n_iter = self.data_reader.current_iteration
285
+ # If we decide to use this option for iteration selection:
286
+ # getattr(args,'bins_from_h5file',None) or self.data_reader.current_iteration
287
+
288
+ with self.data_reader:
289
+ self.dssynth.h5filename = self.data_reader.we_h5filename
290
+ self.dssynth.process_args(args)
291
+ if args.config_from_file is False:
292
+ self.binning.set_we_h5file_info(self.n_iter, self.data_reader)
293
+ self.binning.process_args(args)
294
+
295
+ self.output_filename = args.output
296
+
297
+ if args.config_from_file:
298
+ if not args.scheme:
299
+ raise ValueError('A scheme must be specified.')
300
+ else:
301
+ self.load_config_from_west(args.scheme)
302
+ elif args.states:
303
+ self.parse_cmdline_states(args.states)
304
+ elif args.states_from_file:
305
+ self.load_state_file(args.states_from_file)
306
+ elif args.states_from_function:
307
+ self.load_states_from_function(get_object(args.states_from_function, path=['.']))
308
+
309
+ if self.states and len(self.states) < 2:
310
+ raise ValueError('zero, two, or more macrostates are required')
311
+
312
+ # self.output_file = WESTPAH5File(args.output, 'w', creating_program=True)
313
+ log.debug('state list: {!r}'.format(self.states))
314
+
315
+ self.subsample = args.subsample if args.subsample is not None else False
316
+
317
+ def parse_cmdline_states(self, state_strings):
318
+ states = []
319
+ for istring, state_string in enumerate(state_strings):
320
+ try:
321
+ (label, coord_str) = state_string.split(':')
322
+ except ValueError:
323
+ label = 'state{}'.format(istring)
324
+ coord_str = state_string
325
+ coord = parse_pcoord_value(coord_str)
326
+ states.append({'label': label, 'coords': coord})
327
+ self.states = states
328
+
329
+ def load_config_from_west(self, scheme):
330
+ try:
331
+ config = westpa.rc.config['west']['analysis']
332
+ except Exception:
333
+ raise ValueError('There is no configuration file specified.')
334
+ ystates = config['analysis_schemes'][scheme]['states']
335
+ self.states_from_dict(ystates)
336
+ try:
337
+ self.subsample = config['subsample']
338
+ except Exception:
339
+ pass
340
+ from westpa.core._rc import bins_from_yaml_dict
341
+
342
+ self.binning.mapper = bins_from_yaml_dict(config['analysis_schemes'][scheme]['bins'][0])
343
+ path = os.path.join(os.getcwd(), config['directory'], scheme)
344
+ try:
345
+ os.mkdir(config['directory'])
346
+ os.mkdir(path)
347
+ except Exception:
348
+ pass
349
+
350
+ self.output_filename = os.path.join(path, 'assign.h5')
351
+
352
+ def load_state_file(self, state_filename):
353
+ import yaml
354
+
355
+ ydict = yaml.load(open(state_filename, 'rt'), Loader=yaml.Loader)
356
+ ystates = ydict['states']
357
+ self.states_from_dict(ystates)
358
+
359
+ def states_from_dict(self, ystates):
360
+ states = []
361
+ for istate, ystate in enumerate(ystates):
362
+ state = {}
363
+ state['label'] = ystate.get('label', 'state{}'.format(istate))
364
+ # coords can be:
365
+ # - a scalar, in which case it is one bin, 1-D
366
+ # - a single list, which is rejected as ambiguous
367
+ # - a list of lists, which is a list of coordinate tuples
368
+ coords = np.array(ystate['coords'])
369
+ if coords.ndim == 0:
370
+ coords.shape = (1, 1)
371
+ elif coords.ndim == 1:
372
+ raise ValueError(
373
+ 'list {!r} is ambiguous (list of 1-d coordinates, or single multi-d coordinate?)'.format(ystate['coords'])
374
+ )
375
+ elif coords.ndim > 2:
376
+ raise ValueError('coordinates must be 2-D')
377
+ state['coords'] = coords
378
+ states.append(state)
379
+ self.states = states
380
+
381
+ def load_states_from_function(self, statefunc):
382
+ states = statefunc(self.binning.mapper)
383
+ for istate, state in enumerate(states):
384
+ state.setdefault('label', 'state{}'.format(istate))
385
+ try:
386
+ state['coords'] = np.array(state['coords'])
387
+ except KeyError:
388
+ raise ValueError('state function {!r} returned a state {!r} without coordinates'.format(statefunc, state))
389
+ self.states = states
390
+ log.debug('loaded states: {!r}'.format(self.states))
391
+
392
+ def assign_iteration(self, n_iter, nstates, nbins, state_map, last_labels):
393
+ '''Method to encapsulate the segment slicing (into n_worker slices) and parallel job submission
394
+ Submits job(s), waits on completion, splices them back together
395
+ Returns: assignments, trajlabels, pops for this iteration'''
396
+
397
+ futures = []
398
+
399
+ iter_group = self.data_reader.get_iter_group(n_iter)
400
+ nsegs, npts = iter_group['pcoord'].shape[:2]
401
+ n_workers = self.work_manager.n_workers or 1
402
+ assignments = np.empty((nsegs, npts), dtype=index_dtype)
403
+ trajlabels = np.empty((nsegs, npts), dtype=index_dtype)
404
+ statelabels = np.empty((nsegs, npts), dtype=index_dtype)
405
+ pops = np.zeros((nstates + 1, nbins + 1), dtype=weight_dtype)
406
+
407
+ # Submit jobs to work manager
408
+ blocksize = nsegs // n_workers
409
+ if nsegs % n_workers > 0:
410
+ blocksize += 1
411
+
412
+ def task_gen():
413
+ if __debug__:
414
+ checkset = set()
415
+ for lb in range(0, nsegs, blocksize):
416
+ ub = min(nsegs, lb + blocksize)
417
+ if __debug__:
418
+ checkset.update(set(range(lb, ub)))
419
+ args = ()
420
+ kwargs = dict(
421
+ n_iter=n_iter,
422
+ lb=lb,
423
+ ub=ub,
424
+ mapper=self.binning.mapper,
425
+ nstates=nstates,
426
+ state_map=state_map,
427
+ last_labels=last_labels,
428
+ parent_id_dsspec=self.data_reader.parent_id_dsspec,
429
+ weight_dsspec=self.data_reader.weight_dsspec,
430
+ pcoord_dsspec=self.dssynth.dsspec,
431
+ subsample=self.subsample,
432
+ )
433
+ yield (_assign_label_pop, args, kwargs)
434
+
435
+ # futures.append(self.work_manager.submit(_assign_label_pop,
436
+ # kwargs=)
437
+ if __debug__:
438
+ assert checkset == set(range(nsegs)), 'segments missing: {}'.format(set(range(nsegs)) - checkset)
439
+
440
+ # for future in self.work_manager.as_completed(futures):
441
+ for future in self.work_manager.submit_as_completed(task_gen(), queue_size=self.max_queue_len):
442
+ assign_slice, traj_slice, slice_pops, lb, ub, state_slice = future.get_result(discard=True)
443
+ assignments[lb:ub, :] = assign_slice
444
+ trajlabels[lb:ub, :] = traj_slice
445
+ statelabels[lb:ub, :] = state_slice
446
+ pops += slice_pops
447
+ del assign_slice, traj_slice, slice_pops, state_slice
448
+
449
+ del futures
450
+ return (assignments, trajlabels, pops, statelabels)
451
+
452
+ def go(self):
453
+ assert self.data_reader.parent_id_dsspec._h5file is None
454
+ assert self.data_reader.weight_dsspec._h5file is None
455
+ if hasattr(self.dssynth.dsspec, '_h5file'):
456
+ assert self.dssynth.dsspec._h5file is None
457
+ pi = self.progress.indicator
458
+ pi.operation = 'Initializing'
459
+ with pi, self.data_reader, WESTPAH5File(self.output_filename, 'w', creating_program=True) as self.output_file:
460
+ assign = self.binning.mapper.assign
461
+
462
+ # We always assign the entire simulation, so that no trajectory appears to start
463
+ # in a transition region that doesn't get initialized in one.
464
+ iter_start = 1
465
+ iter_stop = self.data_reader.current_iteration
466
+
467
+ h5io.stamp_iter_range(self.output_file, iter_start, iter_stop)
468
+
469
+ nbins = self.binning.mapper.nbins
470
+ self.output_file.attrs['nbins'] = nbins
471
+
472
+ state_map = np.empty((self.binning.mapper.nbins + 1,), index_dtype)
473
+ state_map[:] = 0 # state_id == nstates => unknown state
474
+
475
+ # Recursive mappers produce a generator rather than a list of labels
476
+ # so consume the entire generator into a list
477
+ labels = [np.string_(label) for label in self.binning.mapper.labels]
478
+
479
+ self.output_file.create_dataset('bin_labels', data=labels, compression=9)
480
+
481
+ if self.states:
482
+ nstates = len(self.states)
483
+ state_map[:] = nstates # state_id == nstates => unknown state
484
+ state_labels = [np.string_(state['label']) for state in self.states]
485
+
486
+ for istate, sdict in enumerate(self.states):
487
+ assert state_labels[istate] == np.string_(sdict['label']) # sanity check
488
+ state_assignments = assign(sdict['coords'])
489
+ for assignment in state_assignments:
490
+ state_map[assignment] = istate
491
+ self.output_file.create_dataset('state_map', data=state_map, compression=9, shuffle=True)
492
+ self.output_file['state_labels'] = state_labels # + ['(unknown)']
493
+ else:
494
+ nstates = 0
495
+ self.output_file.attrs['nstates'] = nstates
496
+ # Stamp if this has been subsampled.
497
+ self.output_file.attrs['subsampled'] = self.subsample
498
+
499
+ iter_count = iter_stop - iter_start
500
+ nsegs = np.empty((iter_count,), seg_id_dtype)
501
+ npts = np.empty((iter_count,), seg_id_dtype)
502
+
503
+ # scan for largest number of segments and largest number of points
504
+ pi.new_operation('Scanning for segment and point counts', iter_stop - iter_start)
505
+ for iiter, n_iter in enumerate(range(iter_start, iter_stop)):
506
+ iter_group = self.data_reader.get_iter_group(n_iter)
507
+ nsegs[iiter], npts[iiter] = iter_group['pcoord'].shape[0:2]
508
+ pi.progress += 1
509
+ del iter_group
510
+
511
+ pi.new_operation('Preparing output')
512
+
513
+ # create datasets
514
+ self.output_file.create_dataset('nsegs', data=nsegs, shuffle=True, compression=9)
515
+ self.output_file.create_dataset('npts', data=npts, shuffle=True, compression=9)
516
+
517
+ max_nsegs = nsegs.max()
518
+ max_npts = npts.max()
519
+
520
+ assignments_shape = (iter_count, max_nsegs, max_npts)
521
+ assignments_dtype = np.min_scalar_type(nbins)
522
+ assignments_ds = self.output_file.create_dataset(
523
+ 'assignments',
524
+ dtype=assignments_dtype,
525
+ shape=assignments_shape,
526
+ compression=4,
527
+ shuffle=True,
528
+ chunks=h5io.calc_chunksize(assignments_shape, assignments_dtype),
529
+ fillvalue=nbins,
530
+ )
531
+ if self.states:
532
+ trajlabel_dtype = np.min_scalar_type(nstates)
533
+ trajlabels_ds = self.output_file.create_dataset(
534
+ 'trajlabels',
535
+ dtype=trajlabel_dtype,
536
+ shape=assignments_shape,
537
+ compression=4,
538
+ shuffle=True,
539
+ chunks=h5io.calc_chunksize(assignments_shape, trajlabel_dtype),
540
+ fillvalue=nstates,
541
+ )
542
+ statelabels_ds = self.output_file.create_dataset(
543
+ 'statelabels',
544
+ dtype=trajlabel_dtype,
545
+ shape=assignments_shape,
546
+ compression=4,
547
+ shuffle=True,
548
+ chunks=h5io.calc_chunksize(assignments_shape, trajlabel_dtype),
549
+ fillvalue=nstates,
550
+ )
551
+
552
+ pops_shape = (iter_count, nstates + 1, nbins + 1)
553
+ pops_ds = self.output_file.create_dataset(
554
+ 'labeled_populations',
555
+ dtype=weight_dtype,
556
+ shape=pops_shape,
557
+ compression=4,
558
+ shuffle=True,
559
+ chunks=h5io.calc_chunksize(pops_shape, weight_dtype),
560
+ )
561
+ h5io.label_axes(pops_ds, [np.string_(i) for i in ['iteration', 'state', 'bin']])
562
+
563
+ pi.new_operation('Assigning to bins', iter_stop - iter_start)
564
+ last_labels = None # mapping of seg_id to last macrostate inhabited
565
+ for iiter, n_iter in enumerate(range(iter_start, iter_stop)):
566
+ # get iteration info in this block
567
+
568
+ if iiter == 0:
569
+ last_labels = np.empty((nsegs[iiter],), index_dtype)
570
+ last_labels[:] = nstates # unknown state
571
+
572
+ # Slices this iteration into n_workers groups of segments, submits them to wm, splices results back together
573
+ assignments, trajlabels, pops, statelabels = self.assign_iteration(n_iter, nstates, nbins, state_map, last_labels)
574
+
575
+ # Do stuff with this iteration's results
576
+
577
+ last_labels = trajlabels[:, -1].copy()
578
+ assignments_ds[iiter, 0 : nsegs[iiter], 0 : npts[iiter]] = assignments
579
+ pops_ds[iiter] = pops
580
+ if self.states:
581
+ trajlabels_ds[iiter, 0 : nsegs[iiter], 0 : npts[iiter]] = trajlabels
582
+ statelabels_ds[iiter, 0 : nsegs[iiter], 0 : npts[iiter]] = statelabels
583
+
584
+ pi.progress += 1
585
+ del assignments, trajlabels, pops, statelabels
586
+
587
+ for dsname in 'assignments', 'npts', 'nsegs', 'labeled_populations', 'statelabels':
588
+ h5io.stamp_iter_range(self.output_file[dsname], iter_start, iter_stop)
589
+
590
+
591
+ def entry_point():
592
+ WAssign().main()
593
+
594
+
595
+ if __name__ == '__main__':
596
+ entry_point()