treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
TreeSAK/FastRoot.py ADDED
@@ -0,0 +1,1175 @@
1
+ import os
2
+ import re
3
+ import sys
4
+ import numpy
5
+ import cvxopt
6
+ import logging
7
+ import argparse
8
+ from numpy import *
9
+ from treeswift import *
10
+ from sys import stdin, stdout, argv, exit, stderr
11
+
12
+
13
+ ########################################################################################################################
14
+
15
+ PROGRAM_NAME = "FastRoot"
16
+ PROGRAM_AUTHOR = ["Uyen Mai", "Merve Kilic", "Erfan Sayyari", "Siavash Mirarab"]
17
+ PROGRAM_LICENSE = "MIT License"
18
+ PROGRAM_VERSION = "1.5"
19
+ PROGRAM_YEAR = "2017"
20
+ PROGRAM_INSTITUTE = "University of California at San Diego"
21
+
22
+
23
+ def new_logger(myName, myLevel=logging.INFO, myStream=stdout):
24
+ logger = logging.getLogger(myName)
25
+ logger.setLevel(myLevel)
26
+ handler = logging.StreamHandler(myStream)
27
+ formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
28
+ handler.setFormatter(formatter)
29
+ logger.addHandler(handler)
30
+ logger.propagate = False
31
+
32
+ return logger
33
+
34
+
35
+ ########################################################################################################################
36
+
37
+ '''
38
+ logger = logging.getLogger("Tree_extend.py")
39
+ logger.setLevel(logging.INFO)
40
+ handler = logging.StreamHandler(sys.stdout)
41
+ formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
42
+ handler.setFormatter(formatter)
43
+ logger.addHandler(handler)
44
+ logger.propagate = False
45
+ '''
46
+
47
+
48
+ class Tree_extend(object):
49
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick"): # ,logger_id=1,logger_stream=sys.stderr):
50
+ # self.logger = new_logger(__name__+ "_" + str(logger_id),myStream=logger_stream)
51
+ if tree_file:
52
+ self.ddpTree = read_tree(tree_file, schema)
53
+ else:
54
+ self.ddpTree = ddpTree
55
+
56
+ def Bottomup_label(self):
57
+ # assign each node a label so that we can later relate to it
58
+ i = 0
59
+ for node in self.ddpTree.traverse_postorder():
60
+ if node.is_leaf():
61
+ node.name = 'L' + str(i)
62
+ else:
63
+ node.name = 'I' + str(i)
64
+ i += 1
65
+
66
+ def Topdown_label(self, label_type="all"):
67
+ # assign each node a label so that we can later relate to it
68
+ i = 0
69
+
70
+ for node in self.ddpTree.traverse_preorder():
71
+ if node.is_leaf():
72
+ if label_type == "all" or label_type == "leaves":
73
+ node.name = 'L' + str(i)
74
+ else:
75
+ node.name = node.label
76
+ else:
77
+ if label_type == "all" or label_type == "internal":
78
+ node.name = 'I' + str(i)
79
+ else:
80
+ node.name = node.label
81
+ i += 1
82
+
83
+ def Bottomup_update(self):
84
+ for node in self.ddpTree.traverse_postorder():
85
+ self.Node_init(node)
86
+ self.bUp_update(node)
87
+
88
+ def Topdown_update(self):
89
+ for node in self.ddpTree.traverse_preorder():
90
+ self.tDown_update(node, self.Opt_function)
91
+
92
+ def compute_distances(self):
93
+ D = {}
94
+
95
+ def __compute_dRoot__(node, cumm_l):
96
+ if node.is_leaf():
97
+ D[node.name] = cumm_l
98
+ else:
99
+ for child in node.child_nodes():
100
+ __compute_dRoot__(child, cumm_l + child.edge_length)
101
+
102
+ __compute_dRoot__(self.ddpTree.root, 0)
103
+ return D
104
+
105
+ def compute_ingroup_distances(self):
106
+ D = []
107
+
108
+ def __compute_dLeaf__(node, cumm_l):
109
+ if node.is_leaf():
110
+ D.append(cumm_l)
111
+ else:
112
+ for child in node.child_nodes():
113
+ __compute_dLeaf__(child, cumm_l + child.edge_length)
114
+
115
+ children = self.ddpTree.root.child_nodes()
116
+ crowded_child = None
117
+ maxleaf = -1
118
+
119
+ for node in children:
120
+ if node.nleaf > maxleaf:
121
+ maxleaf = node.nleaf
122
+ crowded_child = node
123
+
124
+ __compute_dLeaf__(children[1], 0)
125
+
126
+ return D
127
+
128
+ def filter_branch(self, threshold=None):
129
+ # filter out abnormally long branches
130
+ i = 1
131
+ self.logger.info("Iteration: " + str(i))
132
+ self.Reroot()
133
+ while 1:
134
+ check = self.filter_by_threshold(threshold=threshold)
135
+ if (not check):
136
+ self.logger.info("I could not remove anything more! I stop here!")
137
+ break
138
+ i += 1
139
+ self.logger.info("Iteration: " + str(i))
140
+ self.reset()
141
+ self.Reroot()
142
+
143
+ def filter_by_threshold(self, threshold=None, k=3.5):
144
+ if threshold is None:
145
+ threshold = self.compute_threshold(k=k)
146
+
147
+ def __filter__(node, cumm_l):
148
+ removed = False
149
+ node.child_removed = False
150
+ for child in node.child_nodes():
151
+ check = __filter__(child, cumm_l + child.edge_length)
152
+ removed = removed or check
153
+
154
+ p = node.parent_node
155
+ # if ( cumm_l > threshold ) or ( node.child_removed and len(node.child_nodes()) == 0 ):
156
+ if (cumm_l > threshold) or (node.child_removed and node.num_children() == 0):
157
+ # remove node
158
+ p.remove_child(node)
159
+ # update parent node
160
+ p.child_removed = True
161
+ removed = True
162
+ try:
163
+ self.logger.info(node.label + " removed")
164
+ except:
165
+ self.logger.info(node.name + " removed")
166
+ # elif len(node.child_nodes()) == 1:
167
+ elif node.num_child_nodes() == 1:
168
+ # remove node and attach its only child to its parent
169
+ e1 = node.edge_length
170
+ child = node.child_nodes()[0]
171
+ e2 = child.edge_length
172
+ p.remove_child(node)
173
+ node.remove_child(child)
174
+ p.add_child(child)
175
+ child.edge_length = e1 + e2
176
+ return removed
177
+
178
+ return __filter__(self.get_root(), 0)
179
+
180
+ def compute_threhold(self, k=3.5):
181
+ self.logger.warning("Abstract class! Should never be called")
182
+ return 0
183
+
184
+ def reset(self):
185
+ self.logger.warning("Abstract class! Should never be called")
186
+
187
+ def find_root(self):
188
+ self.Topdown_label() # temporarily included for debugging
189
+ self.Bottomup_update()
190
+ self.prepare_root()
191
+ self.Topdown_update()
192
+
193
+ def opt_score(self):
194
+ self.logger.warning("Abstract class! Should never be called")
195
+
196
+ def report_score(self):
197
+ self.logger.warning("Abstract class! Should never be called")
198
+
199
+ def Reroot(self):
200
+ self.find_root()
201
+ # self.report_score()
202
+ # d2currRoot = 0
203
+ # br2currRoot = 0
204
+ if self.opt_root != self.ddpTree.root:
205
+ # d2currRoot,br2currRoot = self.reroot_at_edge(self.opt_root.edge, self.opt_root.edge_length-self.opt_x, self.opt_x)
206
+ self.reroot_at_edge(self.opt_root, self.opt_x)
207
+ # self.ddpTree.reroot(self.opt_root,self.opt_x)
208
+
209
+ # return head_id, tail_id, edge_length, self.opt_x
210
+ # return d2currRoot,br2currRoot
211
+
212
+ def Opt_function(self, node):
213
+ self.logger.warning("Abstract method! Should never be called")
214
+
215
+ def tree_as_newick(self, outstream=sys.stdout, label_by_name=False):
216
+ # dendropy's method to write newick seems to have problem ...
217
+ self.__write_newick(self.ddpTree.root, outstream, label_by_name=label_by_name)
218
+ outstream.write(";\n")
219
+
220
+ # outstream.write(bytes(";\n", "ascii"))
221
+
222
+ def __write_newick(self, node, outstream, label_by_name=False):
223
+ if node.is_leaf():
224
+ if label_by_name:
225
+ outstream.write(str(node.name))
226
+ # outstream.write(bytes(str(node.name), "ascii"))
227
+ else:
228
+ try:
229
+ outstream.write(node.label)
230
+ # outstream.write(bytes(node.label, "ascii"))
231
+ except:
232
+ outstream.write(node.label)
233
+ # outstream.write(bytes(str(node.label), "ascii"))
234
+ else:
235
+ outstream.write('(')
236
+ # outstream.write(bytes('(', "ascii"))
237
+ is_first_child = True
238
+ for child in node.child_nodes():
239
+ if is_first_child:
240
+ is_first_child = False
241
+ else:
242
+ outstream.write(',')
243
+ # outstream.write(bytes(',', "ascii"))
244
+ self.__write_newick(child, outstream, label_by_name=label_by_name)
245
+ outstream.write(')')
246
+ # outstream.write(bytes(')', "ascii"))
247
+ if not node.is_leaf():
248
+ if label_by_name:
249
+ outstream.write(str(node.name))
250
+ # outstream.write(bytes(str(node.name), "ascii"))
251
+ elif node.label is not None:
252
+ outstream.write(str(node.label))
253
+ # outstream.write(bytes(str(node.label), "ascii"))
254
+
255
+ if not node.edge_length is None:
256
+ outstream.write(":" + str(node.edge_length))
257
+
258
+ # outstream.write(bytes(":" + str(node.edge_length), "ascii"))
259
+
260
+ def reroot_at_edge(self, node, length):
261
+ # the method provided by dendropy DOESN'T seem to work ...
262
+ # change edge to opt_root
263
+ length1 = node.edge_length - length
264
+ length2 = length
265
+ if not node:
266
+ return
267
+ head = node # opt_root = v = node
268
+ tail = node.parent # u parent of opt_root
269
+ if not tail:
270
+ return
271
+
272
+ if (length2 == 0) and head.is_leaf():
273
+ return 0, 0
274
+
275
+ # new_root = self.ddpTree.node_factory()
276
+ new_root = Node()
277
+
278
+ tail.remove_child(head)
279
+
280
+ new_root.add_child(head)
281
+ head.edge_length = length2
282
+
283
+ p = tail.parent
284
+ l = tail.edge_length
285
+
286
+ new_root.add_child(tail)
287
+ tail.edge_length = length1
288
+
289
+ br2currRoot = 0
290
+ d2currRoot = length1
291
+
292
+ # if tail.label == self.ddpTree.root.label:
293
+ if (tail is self.ddpTree.root):
294
+ head = new_root
295
+
296
+ while tail is not self.ddpTree.root:
297
+ # MAD@ add
298
+ # q = tail.parent #tail should have 2 parents right now: new_root and its old parent
299
+ q = head.parent
300
+ # End MAD@ add
301
+ head = tail
302
+ tail = p
303
+ p = tail.parent
304
+
305
+ br2currRoot += 1
306
+ d2currRoot += l
307
+
308
+ l1 = tail.edge_length
309
+ tail.remove_child(head)
310
+ # MAD@ add
311
+ head.parent = q
312
+ # End MAD@ add
313
+
314
+ head.add_child(tail)
315
+ tail.edge_length = l
316
+ l = l1
317
+
318
+ # out of while loop: tail IS now tree.root
319
+ if tail.num_children() == 1:
320
+ # merge the 2 branches of the old root and adjust the branch length
321
+ # sis = [child for child in tail.child_nodes()][0]
322
+ sis = tail.child_nodes()[0]
323
+ l = sis.edge_length
324
+ tail.remove_child(sis)
325
+ head.add_child(sis)
326
+ sis.edge_length = l + tail.edge_length
327
+ head.remove_child(tail)
328
+ # tail.remove_child(head)
329
+
330
+ new_root.name = self.ddpTree.root.name
331
+ self.ddpTree.root.name = "OLD"
332
+ self.ddpTree.root = new_root
333
+
334
+ ### MAD@ add
335
+ # for node in self.ddpTree.traverse_postorder():
336
+ # for child in node.child_nodes():
337
+ # if child.parent_node is not node:
338
+ # logger.info("Error found!")
339
+ # child.parent_node = node
340
+ ### MAD@ add
341
+
342
+ return d2currRoot, br2currRoot
343
+
344
+ def get_root(self):
345
+ return self.ddpTree.root
346
+
347
+
348
+ class OGR_Tree(Tree_extend):
349
+ # supportive class to implement outgroup-reroot (OGR = outgroup reroot, hence the name)
350
+ # this rooting method solve the difficulty in finding the root when there are mulitple outgroups
351
+ # and they are not monophyletic. It seeks for the rooting place that maximizes the triplet score
352
+ # of the specified outgroups.
353
+ def __init__(self, outgroups, ddpTree=None, tree_file=None, schema="newick", logger_id=1, logger_stream=sys.stderr):
354
+ super(OGR_Tree, self).__init__(ddpTree, tree_file, schema)
355
+ self.logger = new_logger("OGR_Tree_" + str(logger_id), myStream=logger_stream)
356
+ # L = self.ddpTree.leaf_nodes()
357
+ L = []
358
+ for leaf in self.ddpTree.traverse_leaves():
359
+ L.append(leaf)
360
+ self.OGs = set([x.label for x in L if x.label in set(outgroups)])
361
+ self.nOGs = len(self.OGs)
362
+ self.nIGs = len(L) - self.nOGs
363
+ self.max_nTrpls = self.nIGs * self.nOGs * (self.nOGs - 1) / 2 + self.nOGs * self.nIGs * (self.nIGs - 1) / 2
364
+ self.reset()
365
+
366
+ def reset(self):
367
+ self.opt_root = self.ddpTree.root
368
+ self.opt_nTrpls = 0
369
+
370
+ def Node_init(self, node, nTrpl_in=0, nTrpl_out=0, nOGs=0, nIGs=0):
371
+ node.nTrpl_in = nTrpl_in
372
+ node.nTrpl_out = nTrpl_out
373
+ node.nOGs = nOGs
374
+ node.nIGs = nIGs
375
+
376
+ def Opt_function(self, node):
377
+ curr_nTrpls = node.nTrpl_in + node.nTrpl_out
378
+ if curr_nTrpls > self.opt_nTrpls:
379
+ self.opt_nTrpls = curr_nTrpls
380
+ self.opt_root = node
381
+ self.opt_x = node.edge_length / 2 # NOTE: this method does not consider branch length, the *middle point* of the edge is just arbitrarily chosen
382
+
383
+ def bUp_update(self, node):
384
+ if node.is_leaf():
385
+ node.nOGs = 1 if node.label in self.OGs else 0
386
+ node.nIGs = 1 if node.nOGs == 0 else 0
387
+ else:
388
+ C = node.child_nodes()
389
+
390
+ node.nOGs = sum([c.nOGs for c in C])
391
+ node.nIGs = sum([c.nIGs for c in C])
392
+
393
+ node.nTrpl_in = sum([c.nTrpl_in for c in C])
394
+
395
+ for i, c1 in enumerate(C):
396
+ for c2 in C[i + 1:]:
397
+ IG_trpls = c1.nIGs * c2.nIGs * (self.nOGs - node.nOGs)
398
+ OG_trpls = c1.nOGs * c2.nOGs * (self.nIGs - node.nIGs)
399
+ node.nTrpl_in += IG_trpls + OG_trpls
400
+
401
+ def tDown_update(self, node, opt_function):
402
+ C = node.child_nodes()
403
+
404
+ for child in C:
405
+ C1 = [c for c in C if c is not child]
406
+ child.nTrpl_out = node.nTrpl_out
407
+
408
+ for i, c1 in enumerate(C1):
409
+ child.nTrpl_out += c1.nTrpl_in
410
+ child.nTrpl_out += (self.nIGs - node.nIGs) * c1.nIGs * child.nOGs
411
+ child.nTrpl_out += (self.nOGs - node.nOGs) * c1.nOGs * child.nIGs
412
+
413
+ for c2 in C1[i + 1:]:
414
+ IG_trpls = c1.nIGs * c2.nIGs * child.nOGs
415
+ OG_trpls = c1.nOGs * c2.nOGs * child.nIGs
416
+
417
+ child.nTrpl_out += IG_trpls + OG_trpls
418
+
419
+ opt_function(child)
420
+
421
+ def prepare_root(self):
422
+ pass
423
+
424
+ def opt_score(self):
425
+ return self.opt_nTrpls / float(self.max_nTrpls) if self.max_nTrpls != 0 else None
426
+
427
+ def report_score(self):
428
+ myScore = self.opt_score()
429
+ if myScore is None:
430
+ self.logger.warning("OG rooting failed because the tree has no outgroup")
431
+ return "Triplet score: " + str(self.opt_score())
432
+
433
+
434
+ class MPR_Tree(Tree_extend):
435
+ # supportive class to implement midpoint-reroot (mpr = mid point reroot, hence the name)G
436
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick", logger_id=1, logger_stream=sys.stderr):
437
+ super(MPR_Tree, self).__init__(ddpTree, tree_file, schema)
438
+ self.logger = new_logger("MPR_Tree_" + str(logger_id), myStream=logger_stream)
439
+ self.reset()
440
+
441
+ def reset(self):
442
+ self.max_distance = -1
443
+ self.opt_root = self.ddpTree.root
444
+ self.opt_x = 0
445
+
446
+ def Node_init(self, node, max_in=None, max_out=-1):
447
+ node.max_in = max_in if max_in else [0, 0]
448
+ node.max_out = max_out
449
+
450
+ def Opt_function(self, node):
451
+ m = max(node.max_in)
452
+ curr_max_distance = m + node.max_out
453
+ x = (node.max_out - m) / 2
454
+ if curr_max_distance > self.max_distance and x >= 0 and x <= node.edge_length:
455
+ self.max_distance = curr_max_distance
456
+ self.opt_x = x
457
+ self.opt_root = node
458
+
459
+ def bUp_update(self, node):
460
+ if not node.is_leaf():
461
+ node.max_in = []
462
+ for child in node.child_nodes():
463
+ node.max_in.append(max(child.max_in) + child.edge_length)
464
+
465
+ def tDown_update(self, node, opt_function):
466
+ child_idx = 0
467
+ for child in node.child_nodes():
468
+ child.max_out = max([node.max_out] + [node.max_in[k] for k in range(len(node.max_in))
469
+ if k != child_idx]) + child.edge_length
470
+ opt_function(child)
471
+ child_idx += 1
472
+
473
+ def prepare_root(self):
474
+ pass
475
+
476
+ def compute_threhold(self, k=3.5):
477
+ self.logger.warning("Trying to compute threshold for MPR_Tree, which is not supported.")
478
+ return 0
479
+
480
+ def opt_score(self):
481
+ return self.max_distance / 2
482
+
483
+ def report_score(self):
484
+ return "Tree height: " + str(self.opt_score())
485
+
486
+
487
+ ########################################################################################################################
488
+
489
+ '''
490
+ logger = logging.getLogger("MinVar")
491
+ logger.setLevel(logging.INFO)
492
+ handler = logging.StreamHandler(sys.stdout)
493
+ formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
494
+ handler.setFormatter(formatter)
495
+ logger.addHandler(handler)
496
+ logger.propagate = False
497
+ '''
498
+
499
+ class minVAR_Base_Tree(Tree_extend):
500
+ # supportive base class to implement VAR-reroot, hence the name
501
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick",logger_id=1,logger_stream=sys.stderr):
502
+ super(minVAR_Base_Tree, self).__init__(ddpTree, tree_file, schema)
503
+ self.logger = new_logger("MinVar_Tree_" + str(logger_id),myStream=logger_stream)
504
+ self.reset()
505
+
506
+ def reset(self):
507
+ self.minVAR = None
508
+ self.opt_root = self.ddpTree.root
509
+ self.opt_x = 0
510
+
511
+ def Node_init(self, node, nleaf=1, sum_in=0, sum_total=0, var=-1):
512
+ node.sum_in = sum_in
513
+ node.sum_total = sum_total
514
+ node.nleaf = nleaf
515
+ node.var = var
516
+
517
+ def Opt_function(self, node, a, b, c):
518
+ self.logger.info("Abstract method! Should never be called")
519
+
520
+ def compute_dRoot_VAR(self):
521
+ cumm = {'ssq': 0, 'sum': 0}
522
+
523
+ def compute_dRoot(node, cumm_l):
524
+ if node.is_leaf():
525
+ cumm['ssq'] += cumm_l ** 2
526
+ cumm['sum'] += cumm_l
527
+ else:
528
+ for child in node.child_nodes():
529
+ compute_dRoot(child, cumm_l + child.edge_length)
530
+
531
+ compute_dRoot(self.get_root(), 0)
532
+ N = self.get_root().nleaf
533
+ root_var = cumm['ssq'] / N - (cumm['sum'] / N) ** 2
534
+ self.get_root().var = root_var
535
+
536
+ def bUp_update(self, node):
537
+ if node.is_leaf():
538
+ node.nleaf = 1
539
+ node.sum_in = 0
540
+ else:
541
+ node.nleaf = 0
542
+ node.sum_in = 0
543
+ for child in node.child_nodes():
544
+ node.nleaf += child.nleaf
545
+ node.sum_in += child.sum_in + child.nleaf * child.edge_length
546
+
547
+ def Update_var(self, child, node, edge_length):
548
+ alpha = 2 * (node.sum_total - 2 * (child.sum_in + child.nleaf * edge_length)) / self.total_leaves
549
+ beta = 1 - 2 * float(child.nleaf) / self.total_leaves
550
+ a = 1 - beta * beta
551
+ b = alpha - 2 * node.sum_total * beta / self.total_leaves
552
+ c = node.var
553
+ child.var = a * edge_length * edge_length + b * edge_length + c
554
+ return a, b, c
555
+
556
+ def tDown_update(self, node, opt_function):
557
+ for child in node.child_nodes():
558
+ child.sum_total = node.sum_total + (self.total_leaves - 2 * child.nleaf) * child.edge_length
559
+ a, b, c = self.Update_var(child, node, child.edge_length)
560
+ opt_function(child, a, b, c)
561
+
562
+ def prepare_root(self):
563
+ root = self.get_root()
564
+ root.sum_total = root.sum_in
565
+ self.compute_dRoot_VAR()
566
+ self.total_leaves = root.nleaf
567
+
568
+ def opt_score(self):
569
+ return self.minVAR
570
+
571
+ def report_score(self):
572
+ return "MinVar score: " + str(self.opt_score())
573
+
574
+
575
+ class MVDF_Tree(minVAR_Base_Tree):
576
+ # supportive class to implement VAR-reroot + deepest node + factorization
577
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick"):
578
+ super(MVDF_Tree, self).__init__(ddpTree, tree_file, schema)
579
+ self.deep_node = None
580
+
581
+ def reset(self):
582
+ super(MVDF_Tree, self).reset()
583
+ self.deep_node = None
584
+
585
+ def Opt_function(self, node, a, b, c):
586
+ x = -b / (2 * a)
587
+ if x >= 0 and x <= node.edge_length:
588
+ # curr_minVAR = a*x*x + b*x + c
589
+ factor = float(node.nleaf) / self.total_leaves
590
+ factor = factor * (1 - factor)
591
+ curr_minVAR = (a * x * x + b * x + c) / factor
592
+
593
+ if node.var < node.parent_node.var:
594
+ deep_node = node
595
+ else:
596
+ deep_node = node.parent_node
597
+
598
+ updateNeed = False
599
+ if (self.deep_node is None) or (deep_node.var < self.deep_node.var):
600
+ self.deep_node = deep_node
601
+ self.minVAR = curr_minVAR
602
+ updateNeed = True
603
+ elif (self.deep_node is deep_node) and (curr_minVAR < self.minVAR):
604
+ self.minVAR = curr_minVAR
605
+ updateNeed = True
606
+
607
+ if updateNeed:
608
+ self.opt_root = node
609
+ self.opt_x = node.edge_length - x
610
+
611
+ # self.logger.info(str(curr_minVAR) + "\t" + node.label
612
+ # + "\t" + str(node.edge_length-x) + "\t" + str(self.Tree_records[node.idx].var)
613
+ # + "\t" + (str(node.parent_node.label) if node.parent_node else "None")
614
+ # + "\t" + str(self.Tree_records[node.parent_node.idx].var))
615
+
616
+ def compute_threshold(self, k=3.5):
617
+ # should be called only AFTER the MV root was found
618
+ mean = (self.opt_root.sum_total - self.opt_x *
619
+ (self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
620
+ factor = float(self.opt_root.nleaf) / self.total_leaves
621
+ factor = factor * (1 - factor)
622
+ rootVar = self.minVAR * factor
623
+ self.logger.info(mean)
624
+ self.logger.info(rootVar)
625
+ std = math.sqrt(rootVar)
626
+ return mean + k * std
627
+
628
+
629
+ class MVD0_Tree(minVAR_Base_Tree):
630
+ # supportive class to implement VAR-reroot + deepest node + no factorization
631
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick"):
632
+ super(MVD0_Tree, self).__init__(ddpTree, tree_file, schema)
633
+ self.deep_node = None
634
+
635
+ def reset(self):
636
+ super(MVD0_Tree, self).reset()
637
+ self.deep_node = None
638
+
639
+ def Opt_function(self, node, a, b, c):
640
+ x = -b / (2 * a)
641
+ if x >= 0 and x <= node.edge_length:
642
+ curr_minVAR = a * x * x + b * x + c
643
+
644
+ if node.var < node.parent_node.var:
645
+ deep_node = node
646
+ else:
647
+ deep_node = node.parent_node
648
+
649
+ updateNeed = False
650
+ if (self.deep_node is None) or (deep_node.var < self.deep_node.var):
651
+ self.deep_node = deep_node
652
+ self.minVAR = curr_minVAR
653
+ updateNeed = True
654
+ elif (self.deep_node is deep_node) and (curr_minVAR < self.minVAR):
655
+ self.minVAR = curr_minVAR
656
+ updateNeed = True
657
+
658
+ if updateNeed:
659
+ self.opt_root = node
660
+ self.opt_x = node.edge_length - x
661
+
662
+ # self.logger.info(str(curr_minVAR) + "\t" + node.label
663
+ # + "\t" + str(node.edge_length-x) + "\t" + str(self.Tree_records[node.idx].var)
664
+ # + "\t" + (str(node.parent_node.label) if node.parent_node else "None")
665
+ # + "\t" + str(self.Tree_records[node.parent_node.idx].var))
666
+
667
+ def compute_threshold(self, k=3.5):
668
+ # should be called only AFTER the MV root was found
669
+ mean = (self.opt_root.sum_total - self.opt_x *
670
+ (self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
671
+ self.logger.info(mean)
672
+ self.logger.info(self.minVAR)
673
+ std = math.sqrt(self.minVAR)
674
+ return mean + k * std
675
+
676
+
677
+ class MV0F_Tree(minVAR_Base_Tree):
678
+ # supportive class to implement VAR-reroot + no deepest node + factorization
679
+ # def __init__(self, ddpTree = None, tree_file = None, schema = "newick"):
680
+ # super().__init__(ddpTree, tree_file, schema)
681
+
682
+ def Opt_function(self, node, a, b, c):
683
+ x = -b / (2 * a)
684
+ if x >= 0 and x <= node.edge_length:
685
+ # curr_minVAR = a*x*x + b*x + c
686
+ factor = float(node.nleaf) / self.total_leaves
687
+ factor = factor * (1 - factor)
688
+ curr_minVAR = (a * x * x + b * x + c) / factor
689
+ if self.minVAR is None or curr_minVAR < self.minVAR:
690
+ self.minVAR = curr_minVAR
691
+ self.opt_root = node
692
+ self.opt_x = node.edge_length - x
693
+
694
+ # self.logger.info(str(curr_minVAR) + "\t" + node.label
695
+ # + "\t" + str(node.edge_length-x) + "\t" + str(self.Tree_records[node.idx].var)
696
+ # + "\t" + (str(node.parent_node.label) if node.parent_node else "None")
697
+ # + "\t" + str(self.Tree_records[node.parent_node.idx].var))
698
+
699
+ def compute_threshold(self, k=3.5):
700
+ # should be called only AFTER the MV root was found
701
+ mean = (self.opt_root.sum_total - self.opt_x *
702
+ (self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
703
+ factor = float(self.opt_root.nleaf) / self.total_leaves
704
+ factor = factor * (1 - factor)
705
+ rootVar = self.minVAR * factor
706
+ self.logger.info(mean)
707
+ self.logger.info(rootVar)
708
+ std = math.sqrt(rootVar)
709
+ return mean + k * std
710
+
711
+
712
+ class MV00_Tree(minVAR_Base_Tree):
713
+ # supportive class to implement VAR-reroot + no deepest node + no factorization
714
+ # def __init__(self, ddpTree = None, tree_file = None, schema = "newick"):
715
+ # super().__init__(ddpTree, tree_file, schema)
716
+
717
+ def Opt_function(self, node, a, b, c):
718
+ x = -b / (2 * a)
719
+ if x >= 0 and x <= node.edge_length:
720
+ curr_minVAR = a * x * x + b * x + c
721
+ if self.minVAR is None or curr_minVAR < self.minVAR:
722
+ self.minVAR = curr_minVAR
723
+ self.opt_root = node
724
+ self.opt_x = node.edge_length - x
725
+
726
+ def compute_threshold(self, k=3.5):
727
+ # should be called only AFTER the MV root was found
728
+ mean = (self.opt_root.sum_total - self.opt_x *
729
+ (self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
730
+ self.logger.info(mean)
731
+ self.logger.info(self.minVAR)
732
+ std = math.sqrt(self.minVAR)
733
+ return mean + k * std
734
+
735
+
736
+ class MBR_Tree(Tree_extend):
737
+ # supportive class to implement midpoint balance root
738
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick"):
739
+ super(MBR_Tree, self).__init__(ddpTree, tree_file, schema)
740
+
741
+ self.BPs = [] # BPs : balance points
742
+ self.opt_root = self.ddpTree.root
743
+ self.opt_x = 0
744
+
745
+ def Node_init(self, node, nleaf=1, sum_in=0, sum_out=-1):
746
+ self.nleaf = nleaf
747
+ self.sum_in = sum_in
748
+ self.sum_out = sum_out
749
+
750
+ def Opt_function(self, node):
751
+ nleaf = node.nleaf
752
+ mean_in = node.sum_in / nleaf
753
+ mean_out = node.sum_out / (self.total_leaves - nleaf)
754
+ x = (mean_out - mean_in) / 2
755
+ if x >= 0 and x <= node.edge_length:
756
+ self.BPs.append((node, x, mean_in + x))
757
+ node.x = x
758
+ node.mean = mean_in + x
759
+ else:
760
+ node.x = None
761
+ node.mean = None
762
+
763
+ def bUp_update(self, node):
764
+ node.sum_in = 0
765
+ if node.is_leaf():
766
+ node.nleaf = 1
767
+ else:
768
+ node.nleaf = 0
769
+ for child in node.child_nodes():
770
+ node.nleaf += child.nleaf
771
+ node.sum_in += child.sum_in + child.nleaf * child.edge_length
772
+
773
+ def tDown_update(self, node, opt_function):
774
+ child_idx = 0
775
+ for child in node.child_nodes():
776
+ child.sum_out = (node.sum_out + node.sum_in + child.edge_length *
777
+ (self.total_leaves - 2 * child.nleaf) - child.sum_in)
778
+ opt_function(child)
779
+ child_idx += 1
780
+
781
+ def prepare_root(self):
782
+ root = self.get_root()
783
+ root.sum_out = 0
784
+ self.total_leaves = root.nleaf
785
+ root.x = None
786
+ root.mean = None
787
+
788
+ def list_balance_points(self):
789
+ self.Topdown_label()
790
+ self.Bottomup_update()
791
+ self.prepare_root()
792
+ self.Topdown_update()
793
+
794
+ for (node, x, mean) in self.BPs:
795
+ if node.is_leaf():
796
+ # self.logger.info(node.label + "\t" + str(x) + "\t" + str(mean))
797
+ self.logger.info(node.label + "\t" + str(x) + "\t" + str(mean))
798
+ else:
799
+ self.logger.info(node.label + "\t" + str(x) + "\t" + str(mean))
800
+
801
+ def build_balance_tree(self):
802
+ self.Topdown_label() # keep this step for now for debugging purpose
803
+ self.Bottomup_update()
804
+ self.prepare_root()
805
+ self.Topdown_update()
806
+
807
+ # self.list_balance_points()
808
+
809
+ self.balance_tree = self.ddpTree.extract_tree()
810
+
811
+ # bottom up pruning
812
+ for node in self.balance_tree.traverse_postorder():
813
+ node.type = "real"
814
+ node.BPbelow = False
815
+
816
+ '''if node.is_leaf():
817
+ self.logger.info("parent: " + node.label)# + "\t" + str(node.extraction_source.x))
818
+ else:
819
+ self.logger.info("parent: " + node.label)#+ "\t" + str(node.extraction_source.x))'''
820
+
821
+ for ch in node.child_nodes():
822
+ '''try:
823
+ self.logger.info("child: " + ch.label)# + "\t" + str(ch.extraction_source.x))
824
+ except:
825
+ self.logger.info("child: " + ch.label) #+ "\t" + str(ch.extraction_source.x))'''
826
+
827
+ if ch.BPbelow or (ch.extraction_source.x is not None):
828
+ node.BPbelow = True
829
+ # node.BPbelow = node.BPbelow or ch.BPbelow or (ch.extraction_source.x is not None)
830
+
831
+ if not ch.BPbelow:
832
+ # remove the whole clade under ch
833
+ # for ch1 in ch.child_nodes():
834
+ # ch.remove_child(ch1)
835
+ edgelen = ch.edge_length
836
+ node.remove_child(ch)
837
+
838
+ if ch.extraction_source.x is not None:
839
+ # add a new node p at the balance point
840
+ # set p to be a child of node (edge length ch.edge_length - x)
841
+ # add a new node ch1 to be another child of p (edge length ch.mean)
842
+ edgelen = ch.edge_length
843
+
844
+ # p = self.ddpTree.node_factory()
845
+ # ch1 = self.ddpTree.node_factory()
846
+ p = Node()
847
+ ch1 = Node()
848
+
849
+ p.type = "bp" # bp: balance-point
850
+ p.ref_child = ch.extraction_source # link p to the original tree (for later use after finding midpoint)
851
+ ch1.type = "dm" # dm: dummy
852
+
853
+ # node.remove_child(ch)
854
+ node.add_child(p)
855
+ p.add_child(ch1)
856
+
857
+ p.edge_length = edgelen - ch.extraction_source.x
858
+ ch1.edge_length = ch.extraction_source.mean
859
+
860
+ elif ch.extraction_source.x is not None:
861
+ # add a new node p at the balance point
862
+ # set p to be a child of node (edge length ch.edge_length - x)
863
+ # set ch to be a child of p (edge length x)
864
+ # add a new node ch1 to be another child of p (edge length ch.mean)
865
+
866
+ edgelen = ch.edge_length
867
+
868
+ # p = self.ddpTree.node_factory()
869
+ p = Node()
870
+ # ch1 = self.ddpTree.node_factory()
871
+ ch1 = Node()
872
+
873
+ p.type = "bp"
874
+ p.ref_child = ch.extraction_source # link p to the original tree (for later use after finding midpoint)
875
+ ch1.type = "dm"
876
+
877
+ node.remove_child(ch)
878
+ node.add_child(p)
879
+ p.add_child(ch)
880
+ p.add_child(ch1)
881
+
882
+ ch.edge_length = ch.extraction_source.x
883
+ p.edge_length = edgelen - ch.extraction_source.x
884
+ ch1.edge_length = ch.extraction_source.mean
885
+
886
+ # topdown pruning
887
+ node = self.balance_tree.root
888
+ nchild = len(node.child_nodes())
889
+ while nchild > 0 and nchild < 2:
890
+ # node has less than 2 children
891
+ temp = node
892
+ node = node.child_nodes()[0]
893
+ temp.remove_child(node)
894
+ if node.type == "dm":
895
+ node = temp
896
+ break
897
+ nchild = len(node.child_nodes())
898
+
899
+ self.balance_tree.root = node
900
+ self.balance_tree.root.edge_length = None
901
+ # balance_tree.root = None
902
+
903
+ # mptre = MPR_Tree(ddpTree=balance_tree)
904
+ # mptre.tree_as_newick()
905
+
906
+ # return balance_tree
907
+
908
+ def find_root(self):
909
+ self.build_balance_tree()
910
+ mptre = MPR_Tree(ddpTree=self.balance_tree)
911
+ mptre.tree_as_newick()
912
+ mptre.find_root()
913
+
914
+ self.logger.info(mptre.opt_root.type)
915
+
916
+ if mptre.opt_root.type == "bp":
917
+ self.opt_root = mptre.opt_root.ref_child
918
+ self.opt_x = mptre.opt_root.ref_child.x + mptre.opt_x
919
+ elif mptre.opt_root.type == "dm":
920
+ self.logger.info("Hmm... Is it possible that a dummy was found as the opt_root?")
921
+ else:
922
+ self.opt_root = mptre.opt_root.extraction_source
923
+ self.opt_x = mptre.opt_x
924
+
925
+ self.logger.info(self.opt_root.label)
926
+ self.logger.info(self.opt_x)
927
+
928
+
929
+ ########################################################################################################################
930
+
931
+ logger = logging.getLogger("quadprog_solvers")
932
+ logger.setLevel(logging.INFO)
933
+ handler = logging.StreamHandler(stdout)
934
+ formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
935
+ handler.setFormatter(formatter)
936
+ logger.addHandler(handler)
937
+ logger.propagate = False
938
+
939
+ def cvxopt_solve_qp(P, q, G=None, h=None, A=None, b=None, maxIter=1000):
940
+ P = .5 * (P + P.T) # make sure P is symmetric
941
+ args = [cvxopt.matrix(P), cvxopt.matrix(q)]
942
+ if G is not None:
943
+ args.extend([cvxopt.matrix(G), cvxopt.matrix(h)])
944
+ if A is not None:
945
+ args.extend([cvxopt.matrix(A), cvxopt.matrix(b)])
946
+ sol = cvxopt.solvers.qp(*args,options={'show_progress':False,'maxiters':maxIter})
947
+ if 'optimal' not in sol['status']:
948
+ if "unknown" in sol['status']:
949
+ logger.warning("Couldn't find optimal solution on one branch. Perhaps due to maximum iterations exceeded. Consider increasing the maximum iterations via -x.")
950
+ else:
951
+ logger.warning("Couldn't find optimal solution on one branch. Solution status: " + sol['status'])
952
+ #return None
953
+ return numpy.array(sol['x']).reshape((P.shape[1],))
954
+
955
+
956
+ ########################################################################################################################
957
+
958
+ EPSILON = 1e-5
959
+
960
+ class RTT_Tree(Tree_extend):
961
+ # supportive base class to implement RTT-reroot, hence the name
962
+ def __init__(self, smplTimes, ddpTree=None, tree_file=None, schema="newick", logger_id=1, logger_stream=stderr,
963
+ maxIter=1000):
964
+ super(RTT_Tree, self).__init__(ddpTree, tree_file, schema)
965
+ self.logger = new_logger("RTT_Tree_" + str(logger_id), myStream=logger_stream)
966
+ self.smplTimes = smplTimes
967
+ self.reset()
968
+ self.maxIter = maxIter
969
+
970
+ def reset(self):
971
+ self.RTT = None
972
+ self.opt_root = self.ddpTree.root
973
+ self.opt_y = 0
974
+ self.opt_x = 0
975
+ self.opt_mu = 0
976
+ self.tmin = min(self.smplTimes.values())
977
+
978
+ def Node_init(self, node, nleaf=1, SDI=0, SD=0, ST=0, SDT=0, SSD=0):
979
+ node.SDI = SDI
980
+ node.SD = SD
981
+ node.nleaf = nleaf
982
+ node.ST = ST
983
+ node.SDT = SDT
984
+ node.SSD = SSD
985
+
986
+ def Opt_function(self, node, SST, deltaT, deltaD, SDT, SSD, ST, SD):
987
+ n = self.total_leaves
988
+ a, b, c, d, e, f = n, SST, (-2 * deltaT), (2 * deltaD), (-2 * SDT), SSD
989
+ k, m, r = 2 * (n - 2 * node.nleaf), -2 * ST, 2 * SD
990
+
991
+ tmin = self.tmin
992
+
993
+ # use quadprog to compute mu_star, y_star, and x_star
994
+ P = array([[a, k / 2, c / 2.], [k / 2, n, m / 2], [c / 2, m / 2, b]])
995
+ q = array([d / 2., r / 2, e / 2])
996
+ G = array([[-1., 0., 0.], [0., 0., -1.], [1., 0., 0.], [0., 1., -tmin]])
997
+ h = array([0., EPSILON, node.edge_length, 0]).reshape((4,))
998
+ solution = cvxopt_solve_qp(P, q, G, h, maxIter=self.maxIter)
999
+ x_star = solution[0]
1000
+ y_star = solution[1]
1001
+ mu_star = solution[2]
1002
+ curr_RTT = a * x_star * x_star + b * mu_star * mu_star + c * x_star * mu_star + d * x_star + e * mu_star + f + n * y_star * y_star + k * x_star * y_star + m * mu_star * y_star + r * y_star
1003
+
1004
+ if self.RTT is None or (curr_RTT - self.RTT < -EPSILON):
1005
+ self.RTT = curr_RTT
1006
+ self.opt_root = node
1007
+ self.opt_x = node.edge_length - x_star
1008
+ self.opt_y = y_star
1009
+ self.opt_mu = mu_star
1010
+
1011
+ def bUp_update(self, node):
1012
+ if node.is_leaf():
1013
+ node.nleaf = 1
1014
+ node.SDI = 0
1015
+ node.ST = self.smplTimes[node.label]
1016
+ else:
1017
+ node.nleaf = 0
1018
+ node.SDI = 0
1019
+ node.ST = 0
1020
+ for child in node.child_nodes():
1021
+ node.nleaf += child.nleaf
1022
+ node.SDI += child.SDI + child.nleaf * child.edge_length
1023
+ node.ST += child.ST
1024
+
1025
+ def Update_var(self, child, node, edge_length):
1026
+ SST = self.SST
1027
+ deltaT = self.ddpTree.root.ST - 2 * child.ST
1028
+ deltaD = -2 * child.nleaf * edge_length - 2 * child.SDI + node.SD
1029
+ SDT = node.SDT
1030
+ SSD = node.SSD
1031
+ ST = self.ST
1032
+ SD = node.SD
1033
+ return SST, deltaT, deltaD, SDT, SSD, ST, SD
1034
+
1035
+ def tDown_update(self, node, opt_function):
1036
+ for child in node.child_nodes():
1037
+ child.SD = node.SD + (self.total_leaves - 2 * child.nleaf) * child.edge_length
1038
+ child.SDT = node.SDT + child.edge_length * (self.ddpTree.root.ST - 2 * child.ST)
1039
+ child.SSD = node.SSD + (self.total_leaves - 4 * child.nleaf) * (child.edge_length ** 2) + 2 * (
1040
+ node.SD - 2 * child.SDI) * child.edge_length
1041
+ SST, deltaT, deltaD, SDT, SSD, ST, SD = self.Update_var(child, node, child.edge_length)
1042
+ opt_function(child, SST, deltaT, deltaD, SDT, SSD, ST, SD)
1043
+
1044
+ def prepare_root(self):
1045
+ root = self.get_root()
1046
+ root.SD = root.SDI
1047
+ self.total_leaves = root.nleaf
1048
+ self.ST = root.ST
1049
+ self.ddpTree.root.droot = 0
1050
+ self.ddpTree.root.troot = 0
1051
+ root.SD, root.SSD, root.SDT, self.SST = 0, 0, 0, 0
1052
+ for v in self.ddpTree.traverse_preorder():
1053
+ if not v.is_root():
1054
+ # must have defined edge lengths
1055
+ v.droot = v.parent.droot + v.edge_length
1056
+ if v.is_leaf():
1057
+ root.SSD += (v.droot ** 2)
1058
+ self.SST += (self.smplTimes[v.label] ** 2)
1059
+ root.SD += v.droot
1060
+ root.SDT += (v.droot * self.smplTimes[v.label])
1061
+
1062
+ def opt_score(self):
1063
+ return self.RTT
1064
+
1065
+ def return_values(self):
1066
+ # returns RTT score, mu, t0
1067
+ return (self.opt_score() / self.total_leaves), (self.opt_mu), (self.opt_y / self.opt_mu)
1068
+
1069
+ def report_score(self):
1070
+ return "RTT=" + str(self.opt_score() / self.total_leaves) + "\tmu=" + str(self.opt_mu) + "\tt0=" + str(
1071
+ self.opt_y / self.opt_mu)
1072
+
1073
+
1074
+ ########################################################################################################################
1075
+
1076
+
1077
+ def main():
1078
+
1079
+ parser = argparse.ArgumentParser()
1080
+ parser.add_argument('-i', '--input', required=False, type=argparse.FileType('r'), default=stdin, help="Input File (default is STDIN)")
1081
+ parser.add_argument('-m', '--method', required=False, type=str, default="MV", help="Method (MP for midpoint, MV for minVAR, OG for outgroup, RTT for root-to-tip) (default is MV)")
1082
+ parser.add_argument('-g', '--outgroups', required=False, type=str, help="Specify the outgroups. If specifying a list of outgroups, put them between quotes (i.e. \"). Otherwise, specifying a file which containts all the outgroups. Can only be used with -m OG")
1083
+ parser.add_argument('-t', '--smplTimes', required=False, type=argparse.FileType('r'), help="The file containing the sampling times at leaves; to be used with -m RTT")
1084
+ parser.add_argument('-o', '--outfile', required=False, type=argparse.FileType('w'), default=stdout, help="Output File (default is STDOUT)")
1085
+ parser.add_argument('-s', '--schema', required=False, type=str, default="newick", help="Schema of your input treefile (default is newick)")
1086
+ parser.add_argument('-f', '--infofile', required=False, type=argparse.FileType('w'), default=None, help="Save all the logging to this file. Default: print to stderr")
1087
+ parser.add_argument("-v", "--version", action='version', version=PROGRAM_NAME + " " + PROGRAM_VERSION, help="Show FastRoot version and exit")
1088
+ parser.add_argument("-x", "--maxIter", required=False, type=int, default=None, help="Maximum number of iterations to run cvxopt")
1089
+
1090
+ # print help message if no argument is given
1091
+ if len(argv) == 1:
1092
+ logger = new_logger(__name__)
1093
+ logger.info("Running " + PROGRAM_NAME + " version " + PROGRAM_VERSION)
1094
+ parser.print_help()
1095
+ exit(0)
1096
+
1097
+ args = parser.parse_args()
1098
+ stream = args.infofile if args.infofile else stderr
1099
+ logger = new_logger(__name__, myStream=stream)
1100
+ logger.info("Running " + PROGRAM_NAME + " version " + PROGRAM_VERSION)
1101
+ new_argv = re.sub(' +', ' ', " ".join(argv).replace("\n", " "))
1102
+ logger.info(PROGRAM_NAME + " was called as follows: " + new_argv)
1103
+
1104
+ METHOD2FUNC = {'MP': MPR_Tree, 'MV': MV00_Tree, 'OG': OGR_Tree, 'RTT': RTT_Tree}
1105
+ METHOD2DESC = {'MP': "Midpoint", 'MV': "MinVar", 'OG': "Outgroup", 'RTT': "Root-to-Tip"}
1106
+ method = args.method.upper()
1107
+
1108
+ # reading outgroups
1109
+ if args.outgroups:
1110
+ if method != 'OG':
1111
+ method = 'OG'
1112
+ logger.warning("The rooting method is set to outgroup rooting (OG) due to the presence of outgroups")
1113
+ if os.path.exists(args.outgroups):
1114
+ OGs = []
1115
+ for line in open(args.outgroups, 'r'):
1116
+ OGs.append(line.strip())
1117
+ else:
1118
+ OGs = args.outgroups.split()
1119
+ else:
1120
+ OGs = None
1121
+
1122
+ # reading sampling times
1123
+ if args.smplTimes:
1124
+ smplTimes = {}
1125
+ for line in args.smplTimes:
1126
+ sp, t = line.strip().split()
1127
+ smplTimes[sp] = float(t)
1128
+ if method != 'RTT':
1129
+ method = 'RTT'
1130
+ logger.warning(
1131
+ "The rooting method is set to root-to-tip rooting (RTT) due to the presence of sampling times")
1132
+
1133
+ if method == 'RTT' and args.smplTimes is None:
1134
+ logger.error("Need sampling times for root-to-tip rooting")
1135
+ exit()
1136
+ elif method == 'OG' and args.outgroups is None:
1137
+ logger.error("Need outgroups for outgroup rooting")
1138
+ exit()
1139
+
1140
+ assert method in METHOD2FUNC, "Invalid method! Valid options: MP for midpoint, MV for minVAR, OG for outgroups, RTT for root-to-tip"
1141
+ logger.info("Rooting Method: " + METHOD2DESC[method] + " Rooting")
1142
+
1143
+ maxIter = 0
1144
+ if method == 'RTT':
1145
+ if args.maxIter and args.maxIter < 1000:
1146
+ logger.warning(
1147
+ "Invalid number of maximum iterations (-x). Must be at least 1000. Set back to 1000 by default.")
1148
+ maxIter = max(1000, args.maxIter) if args.maxIter else 1000
1149
+ logger.info("Maximum iterations: " + str(maxIter))
1150
+ elif args.maxIter is not None:
1151
+ logger.warning("The maximum number of iterations (-x) is only used with root-to-tip rooting (RTT)")
1152
+
1153
+ # read and root each tree
1154
+ for i, line in enumerate(args.input):
1155
+ tree = read_tree(line, schema=args.schema.lower())
1156
+ if method == 'OG':
1157
+ a_tree = OGR_Tree(OGs, ddpTree=tree, logger_id=i + 1, logger_stream=stream)
1158
+ elif method == 'RTT':
1159
+ a_tree = RTT_Tree(smplTimes, ddpTree=tree, logger_id=i + 1, logger_stream=stream, maxIter=maxIter)
1160
+ else:
1161
+ a_tree = METHOD2FUNC[method](ddpTree=tree, logger_id=i + 1, logger_stream=stream)
1162
+
1163
+ a_tree.Reroot()
1164
+ logger.info("Tree " + str(i + 1) + " " + a_tree.report_score())
1165
+ a_tree.tree_as_newick(outstream=args.outfile)
1166
+
1167
+
1168
+ if __name__ == "__main__":
1169
+ main()
1170
+
1171
+
1172
+ '''
1173
+
1174
+
1175
+ '''