treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1122 @@
1
+ from treeswift import *
2
+ from sys import stdin, stdout, argv, exit, stderr
3
+ import argparse
4
+ from os import path
5
+ import re
6
+ import io
7
+ import argparse
8
+ import arviz as az
9
+ import pandas as pd
10
+ import plotly.graph_objects as go
11
+ import logging
12
+ from sys import stdout
13
+ import numpy
14
+ import cvxopt
15
+ from numpy import *
16
+ import logging
17
+ from sys import stdout
18
+
19
+
20
+ FastRoot_usage = '''
21
+ ================================= FastRoot example commands =================================
22
+
23
+ TreeSAK FastRoot -m1 r1_mcmc.txt -m2 r1_mcmc.txt -o convergence_plot.png
24
+
25
+ # This script was modified based on the script FastRoot.py from Uyen Mai:
26
+ https://github.com/uym2/MinVar-Rooting/blob/master/FastRoot.py
27
+
28
+ ==============================================================================================
29
+ '''
30
+
31
+
32
+ def new_logger(myName,myLevel=logging.INFO,myStream=stdout):
33
+ logger = logging.getLogger(myName)
34
+ logger.setLevel(myLevel)
35
+ handler = logging.StreamHandler(myStream)
36
+
37
+
38
+ class Tree_extend(object):
39
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick"): # ,logger_id=1,logger_stream=sys.stderr):
40
+ # self.logger = new_logger(__name__+ "_" + str(logger_id),myStream=logger_stream)
41
+ if tree_file:
42
+ self.ddpTree = read_tree(tree_file, schema)
43
+ else:
44
+ self.ddpTree = ddpTree
45
+
46
+ def Bottomup_label(self):
47
+ # assign each node a label so that we can later relate to it
48
+ i = 0
49
+ for node in self.ddpTree.traverse_postorder():
50
+ if node.is_leaf():
51
+ node.name = 'L' + str(i)
52
+ else:
53
+ node.name = 'I' + str(i)
54
+ i += 1
55
+
56
+ def Topdown_label(self, label_type="all"):
57
+ # assign each node a label so that we can later relate to it
58
+ i = 0
59
+
60
+ for node in self.ddpTree.traverse_preorder():
61
+ if node.is_leaf():
62
+ if label_type == "all" or label_type == "leaves":
63
+ node.name = 'L' + str(i)
64
+ else:
65
+ node.name = node.label
66
+ else:
67
+ if label_type == "all" or label_type == "internal":
68
+ node.name = 'I' + str(i)
69
+ else:
70
+ node.name = node.label
71
+ i += 1
72
+
73
+ def Bottomup_update(self):
74
+ for node in self.ddpTree.traverse_postorder():
75
+ self.Node_init(node)
76
+ self.bUp_update(node)
77
+
78
+ def Topdown_update(self):
79
+ for node in self.ddpTree.traverse_preorder():
80
+ self.tDown_update(node, self.Opt_function)
81
+
82
+ def compute_distances(self):
83
+ D = {}
84
+
85
+ def __compute_dRoot__(node, cumm_l):
86
+ if node.is_leaf():
87
+ D[node.name] = cumm_l
88
+ else:
89
+ for child in node.child_nodes():
90
+ __compute_dRoot__(child, cumm_l + child.edge_length)
91
+
92
+ __compute_dRoot__(self.ddpTree.root, 0)
93
+ return D
94
+
95
+ def compute_ingroup_distances(self):
96
+ D = []
97
+
98
+ def __compute_dLeaf__(node, cumm_l):
99
+ if node.is_leaf():
100
+ D.append(cumm_l)
101
+ else:
102
+ for child in node.child_nodes():
103
+ __compute_dLeaf__(child, cumm_l + child.edge_length)
104
+
105
+ children = self.ddpTree.root.child_nodes()
106
+ crowded_child = None
107
+ maxleaf = -1
108
+
109
+ for node in children:
110
+ if node.nleaf > maxleaf:
111
+ maxleaf = node.nleaf
112
+ crowded_child = node
113
+
114
+ __compute_dLeaf__(children[1], 0)
115
+
116
+ return D
117
+
118
+ def filter_branch(self, threshold=None):
119
+ # filter out abnormally long branches
120
+ i = 1
121
+ self.logger.info("Iteration: " + str(i))
122
+ self.Reroot()
123
+ while 1:
124
+ check = self.filter_by_threshold(threshold=threshold)
125
+ if (not check):
126
+ self.logger.info("I could not remove anything more! I stop here!")
127
+ break
128
+ i += 1
129
+ self.logger.info("Iteration: " + str(i))
130
+ self.reset()
131
+ self.Reroot()
132
+
133
+ def filter_by_threshold(self, threshold=None, k=3.5):
134
+ if threshold is None:
135
+ threshold = self.compute_threshold(k=k)
136
+
137
+ def __filter__(node, cumm_l):
138
+ removed = False
139
+ node.child_removed = False
140
+ for child in node.child_nodes():
141
+ check = __filter__(child, cumm_l + child.edge_length)
142
+ removed = removed or check
143
+
144
+ p = node.parent_node
145
+ # if ( cumm_l > threshold ) or ( node.child_removed and len(node.child_nodes()) == 0 ):
146
+ if (cumm_l > threshold) or (node.child_removed and node.num_children() == 0):
147
+ # remove node
148
+ p.remove_child(node)
149
+ # update parent node
150
+ p.child_removed = True
151
+ removed = True
152
+ try:
153
+ self.logger.info(node.label + " removed")
154
+ except:
155
+ self.logger.info(node.name + " removed")
156
+ # elif len(node.child_nodes()) == 1:
157
+ elif node.num_child_nodes() == 1:
158
+ # remove node and attach its only child to its parent
159
+ e1 = node.edge_length
160
+ child = node.child_nodes()[0]
161
+ e2 = child.edge_length
162
+ p.remove_child(node)
163
+ node.remove_child(child)
164
+ p.add_child(child)
165
+ child.edge_length = e1 + e2
166
+ return removed
167
+
168
+ return __filter__(self.get_root(), 0)
169
+
170
+ def compute_threhold(self, k=3.5):
171
+ self.logger.warning("Abstract class! Should never be called")
172
+ return 0
173
+
174
+ def reset(self):
175
+ self.logger.warning("Abstract class! Should never be called")
176
+
177
+ def find_root(self):
178
+ self.Topdown_label() # temporarily included for debugging
179
+ self.Bottomup_update()
180
+ self.prepare_root()
181
+ self.Topdown_update()
182
+
183
+ def opt_score(self):
184
+ self.logger.warning("Abstract class! Should never be called")
185
+
186
+ def report_score(self):
187
+ self.logger.warning("Abstract class! Should never be called")
188
+
189
+ def Reroot(self):
190
+ self.find_root()
191
+ # self.report_score()
192
+ # d2currRoot = 0
193
+ # br2currRoot = 0
194
+ if self.opt_root != self.ddpTree.root:
195
+ # d2currRoot,br2currRoot = self.reroot_at_edge(self.opt_root.edge, self.opt_root.edge_length-self.opt_x, self.opt_x)
196
+ self.reroot_at_edge(self.opt_root, self.opt_x)
197
+ # self.ddpTree.reroot(self.opt_root,self.opt_x)
198
+
199
+ # return head_id, tail_id, edge_length, self.opt_x
200
+ # return d2currRoot,br2currRoot
201
+
202
+ def Opt_function(self, node):
203
+ self.logger.warning("Abstract method! Should never be called")
204
+
205
+ def tree_as_newick(self, outstream=sys.stdout, label_by_name=False):
206
+ # dendropy's method to write newick seems to have problem ...
207
+ self.__write_newick(self.ddpTree.root, outstream, label_by_name=label_by_name)
208
+ outstream.write(";\n")
209
+
210
+ # outstream.write(bytes(";\n", "ascii"))
211
+
212
+ def __write_newick(self, node, outstream, label_by_name=False):
213
+ if node.is_leaf():
214
+ if label_by_name:
215
+ outstream.write(str(node.name))
216
+ # outstream.write(bytes(str(node.name), "ascii"))
217
+ else:
218
+ try:
219
+ outstream.write(node.label)
220
+ # outstream.write(bytes(node.label, "ascii"))
221
+ except:
222
+ outstream.write(node.label)
223
+ # outstream.write(bytes(str(node.label), "ascii"))
224
+ else:
225
+ outstream.write('(')
226
+ # outstream.write(bytes('(', "ascii"))
227
+ is_first_child = True
228
+ for child in node.child_nodes():
229
+ if is_first_child:
230
+ is_first_child = False
231
+ else:
232
+ outstream.write(',')
233
+ # outstream.write(bytes(',', "ascii"))
234
+ self.__write_newick(child, outstream, label_by_name=label_by_name)
235
+ outstream.write(')')
236
+ # outstream.write(bytes(')', "ascii"))
237
+ if not node.is_leaf():
238
+ if label_by_name:
239
+ outstream.write(str(node.name))
240
+ # outstream.write(bytes(str(node.name), "ascii"))
241
+ elif node.label is not None:
242
+ outstream.write(str(node.label))
243
+ # outstream.write(bytes(str(node.label), "ascii"))
244
+
245
+ if not node.edge_length is None:
246
+ outstream.write(":" + str(node.edge_length))
247
+
248
+ # outstream.write(bytes(":" + str(node.edge_length), "ascii"))
249
+
250
+ def reroot_at_edge(self, node, length):
251
+ # the method provided by dendropy DOESN'T seem to work ...
252
+ # change edge to opt_root
253
+ length1 = node.edge_length - length
254
+ length2 = length
255
+ if not node:
256
+ return
257
+ head = node # opt_root = v = node
258
+ tail = node.parent # u parent of opt_root
259
+ if not tail:
260
+ return
261
+
262
+ if (length2 == 0) and head.is_leaf():
263
+ return 0, 0
264
+
265
+ # new_root = self.ddpTree.node_factory()
266
+ new_root = Node()
267
+
268
+ tail.remove_child(head)
269
+
270
+ new_root.add_child(head)
271
+ head.edge_length = length2
272
+
273
+ p = tail.parent
274
+ l = tail.edge_length
275
+
276
+ new_root.add_child(tail)
277
+ tail.edge_length = length1
278
+
279
+ br2currRoot = 0
280
+ d2currRoot = length1
281
+
282
+ # if tail.label == self.ddpTree.root.label:
283
+ if (tail is self.ddpTree.root):
284
+ head = new_root
285
+
286
+ while tail is not self.ddpTree.root:
287
+ # MAD@ add
288
+ # q = tail.parent #tail should have 2 parents right now: new_root and its old parent
289
+ q = head.parent
290
+ # End MAD@ add
291
+ head = tail
292
+ tail = p
293
+ p = tail.parent
294
+
295
+ br2currRoot += 1
296
+ d2currRoot += l
297
+
298
+ l1 = tail.edge_length
299
+ tail.remove_child(head)
300
+ # MAD@ add
301
+ head.parent = q
302
+ # End MAD@ add
303
+
304
+ head.add_child(tail)
305
+ tail.edge_length = l
306
+ l = l1
307
+
308
+ # out of while loop: tail IS now tree.root
309
+ if tail.num_children() == 1:
310
+ # merge the 2 branches of the old root and adjust the branch length
311
+ # sis = [child for child in tail.child_nodes()][0]
312
+ sis = tail.child_nodes()[0]
313
+ l = sis.edge_length
314
+ tail.remove_child(sis)
315
+ head.add_child(sis)
316
+ sis.edge_length = l + tail.edge_length
317
+ head.remove_child(tail)
318
+ # tail.remove_child(head)
319
+
320
+ new_root.name = self.ddpTree.root.name
321
+ self.ddpTree.root.name = "OLD"
322
+ self.ddpTree.root = new_root
323
+
324
+ ### MAD@ add
325
+ # for node in self.ddpTree.traverse_postorder():
326
+ # for child in node.child_nodes():
327
+ # if child.parent_node is not node:
328
+ # logger.info("Error found!")
329
+ # child.parent_node = node
330
+ ### MAD@ add
331
+
332
+ return d2currRoot, br2currRoot
333
+
334
+ def get_root(self):
335
+ return self.ddpTree.root
336
+
337
+
338
+ def cvxopt_solve_qp(P, q, G=None, h=None, A=None, b=None, maxIter=1000):
339
+ P = .5 * (P + P.T) # make sure P is symmetric
340
+ args = [cvxopt.matrix(P), cvxopt.matrix(q)]
341
+ if G is not None:
342
+ args.extend([cvxopt.matrix(G), cvxopt.matrix(h)])
343
+ if A is not None:
344
+ args.extend([cvxopt.matrix(A), cvxopt.matrix(b)])
345
+ sol = cvxopt.solvers.qp(*args,options={'show_progress':False,'maxiters':maxIter})
346
+ if 'optimal' not in sol['status']:
347
+ if "unknown" in sol['status']:
348
+ logger.warning("Couldn't find optimal solution on one branch. Perhaps due to maximum iterations exceeded. Consider increasing the maximum iterations via -x.")
349
+ else:
350
+ logger.warning("Couldn't find optimal solution on one branch. Solution status: " + sol['status'])
351
+ #return None
352
+ return numpy.array(sol['x']).reshape((P.shape[1],))
353
+
354
+ EPSILON = 1e-5
355
+
356
+ class RTT_Tree(Tree_extend):
357
+ # supportive base class to implement RTT-reroot, hence the name
358
+ def __init__(self, smplTimes, ddpTree=None, tree_file=None, schema="newick", logger_id=1, logger_stream=stderr,
359
+ maxIter=1000):
360
+ super(RTT_Tree, self).__init__(ddpTree, tree_file, schema)
361
+ self.logger = new_logger("RTT_Tree_" + str(logger_id), myStream=logger_stream)
362
+ self.smplTimes = smplTimes
363
+ self.reset()
364
+ self.maxIter = maxIter
365
+
366
+ def reset(self):
367
+ self.RTT = None
368
+ self.opt_root = self.ddpTree.root
369
+ self.opt_y = 0
370
+ self.opt_x = 0
371
+ self.opt_mu = 0
372
+ self.tmin = min(self.smplTimes.values())
373
+
374
+ def Node_init(self, node, nleaf=1, SDI=0, SD=0, ST=0, SDT=0, SSD=0):
375
+ node.SDI = SDI
376
+ node.SD = SD
377
+ node.nleaf = nleaf
378
+ node.ST = ST
379
+ node.SDT = SDT
380
+ node.SSD = SSD
381
+
382
+ def Opt_function(self, node, SST, deltaT, deltaD, SDT, SSD, ST, SD):
383
+ n = self.total_leaves
384
+ a, b, c, d, e, f = n, SST, (-2 * deltaT), (2 * deltaD), (-2 * SDT), SSD
385
+ k, m, r = 2 * (n - 2 * node.nleaf), -2 * ST, 2 * SD
386
+
387
+ tmin = self.tmin
388
+
389
+ # use quadprog to compute mu_star, y_star, and x_star
390
+ P = array([[a, k / 2, c / 2.], [k / 2, n, m / 2], [c / 2, m / 2, b]])
391
+ q = array([d / 2., r / 2, e / 2])
392
+ G = array([[-1., 0., 0.], [0., 0., -1.], [1., 0., 0.], [0., 1., -tmin]])
393
+ h = array([0., EPSILON, node.edge_length, 0]).reshape((4,))
394
+ solution = cvxopt_solve_qp(P, q, G, h, maxIter=self.maxIter)
395
+ x_star = solution[0]
396
+ y_star = solution[1]
397
+ mu_star = solution[2]
398
+ curr_RTT = a * x_star * x_star + b * mu_star * mu_star + c * x_star * mu_star + d * x_star + e * mu_star + f + n * y_star * y_star + k * x_star * y_star + m * mu_star * y_star + r * y_star
399
+
400
+ if self.RTT is None or (curr_RTT - self.RTT < -EPSILON):
401
+ self.RTT = curr_RTT
402
+ self.opt_root = node
403
+ self.opt_x = node.edge_length - x_star
404
+ self.opt_y = y_star
405
+ self.opt_mu = mu_star
406
+
407
+ def bUp_update(self, node):
408
+ if node.is_leaf():
409
+ node.nleaf = 1
410
+ node.SDI = 0
411
+ node.ST = self.smplTimes[node.label]
412
+ else:
413
+ node.nleaf = 0
414
+ node.SDI = 0
415
+ node.ST = 0
416
+ for child in node.child_nodes():
417
+ node.nleaf += child.nleaf
418
+ node.SDI += child.SDI + child.nleaf * child.edge_length
419
+ node.ST += child.ST
420
+
421
+ def Update_var(self, child, node, edge_length):
422
+ SST = self.SST
423
+ deltaT = self.ddpTree.root.ST - 2 * child.ST
424
+ deltaD = -2 * child.nleaf * edge_length - 2 * child.SDI + node.SD
425
+ SDT = node.SDT
426
+ SSD = node.SSD
427
+ ST = self.ST
428
+ SD = node.SD
429
+ return SST, deltaT, deltaD, SDT, SSD, ST, SD
430
+
431
+ def tDown_update(self, node, opt_function):
432
+ for child in node.child_nodes():
433
+ child.SD = node.SD + (self.total_leaves - 2 * child.nleaf) * child.edge_length
434
+ child.SDT = node.SDT + child.edge_length * (self.ddpTree.root.ST - 2 * child.ST)
435
+ child.SSD = node.SSD + (self.total_leaves - 4 * child.nleaf) * (child.edge_length ** 2) + 2 * (
436
+ node.SD - 2 * child.SDI) * child.edge_length
437
+ SST, deltaT, deltaD, SDT, SSD, ST, SD = self.Update_var(child, node, child.edge_length)
438
+ opt_function(child, SST, deltaT, deltaD, SDT, SSD, ST, SD)
439
+
440
+ def prepare_root(self):
441
+ root = self.get_root()
442
+ root.SD = root.SDI
443
+ self.total_leaves = root.nleaf
444
+ self.ST = root.ST
445
+ self.ddpTree.root.droot = 0
446
+ self.ddpTree.root.troot = 0
447
+ root.SD, root.SSD, root.SDT, self.SST = 0, 0, 0, 0
448
+ for v in self.ddpTree.traverse_preorder():
449
+ if not v.is_root():
450
+ # must have defined edge lengths
451
+ v.droot = v.parent.droot + v.edge_length
452
+ if v.is_leaf():
453
+ root.SSD += (v.droot ** 2)
454
+ self.SST += (self.smplTimes[v.label] ** 2)
455
+ root.SD += v.droot
456
+ root.SDT += (v.droot * self.smplTimes[v.label])
457
+
458
+ def opt_score(self):
459
+ return self.RTT
460
+
461
+ def return_values(self):
462
+ # returns RTT score, mu, t0
463
+ return (self.opt_score() / self.total_leaves), (self.opt_mu), (self.opt_y / self.opt_mu)
464
+
465
+ def report_score(self):
466
+ return "RTT=" + str(self.opt_score() / self.total_leaves) + "\tmu=" + str(self.opt_mu) + "\tt0=" + str(
467
+ self.opt_y / self.opt_mu)
468
+
469
+
470
+ class OGR_Tree(Tree_extend):
471
+ # supportive class to implement outgroup-reroot (OGR = outgroup reroot, hence the name)
472
+ # this rooting method solve the difficulty in finding the root when there are mulitple outgroups
473
+ # and they are not monophyletic. It seeks for the rooting place that maximizes the triplet score
474
+ # of the specified outgroups.
475
+ def __init__(self, outgroups, ddpTree=None, tree_file=None, schema="newick", logger_id=1, logger_stream=sys.stderr):
476
+ super(OGR_Tree, self).__init__(ddpTree, tree_file, schema)
477
+ self.logger = new_logger("OGR_Tree_" + str(logger_id), myStream=logger_stream)
478
+ # L = self.ddpTree.leaf_nodes()
479
+ L = []
480
+ for leaf in self.ddpTree.traverse_leaves():
481
+ L.append(leaf)
482
+ self.OGs = set([x.label for x in L if x.label in set(outgroups)])
483
+ self.nOGs = len(self.OGs)
484
+ self.nIGs = len(L) - self.nOGs
485
+ self.max_nTrpls = self.nIGs * self.nOGs * (self.nOGs - 1) / 2 + self.nOGs * self.nIGs * (self.nIGs - 1) / 2
486
+ self.reset()
487
+
488
+ def reset(self):
489
+ self.opt_root = self.ddpTree.root
490
+ self.opt_nTrpls = 0
491
+
492
+ def Node_init(self, node, nTrpl_in=0, nTrpl_out=0, nOGs=0, nIGs=0):
493
+ node.nTrpl_in = nTrpl_in
494
+ node.nTrpl_out = nTrpl_out
495
+ node.nOGs = nOGs
496
+ node.nIGs = nIGs
497
+
498
+ def Opt_function(self, node):
499
+ curr_nTrpls = node.nTrpl_in + node.nTrpl_out
500
+ if curr_nTrpls > self.opt_nTrpls:
501
+ self.opt_nTrpls = curr_nTrpls
502
+ self.opt_root = node
503
+ self.opt_x = node.edge_length / 2 # NOTE: this method does not consider branch length, the *middle point* of the edge is just arbitrarily chosen
504
+
505
+ def bUp_update(self, node):
506
+ if node.is_leaf():
507
+ node.nOGs = 1 if node.label in self.OGs else 0
508
+ node.nIGs = 1 if node.nOGs == 0 else 0
509
+ else:
510
+ C = node.child_nodes()
511
+
512
+ node.nOGs = sum([c.nOGs for c in C])
513
+ node.nIGs = sum([c.nIGs for c in C])
514
+
515
+ node.nTrpl_in = sum([c.nTrpl_in for c in C])
516
+
517
+ for i, c1 in enumerate(C):
518
+ for c2 in C[i + 1:]:
519
+ IG_trpls = c1.nIGs * c2.nIGs * (self.nOGs - node.nOGs)
520
+ OG_trpls = c1.nOGs * c2.nOGs * (self.nIGs - node.nIGs)
521
+ node.nTrpl_in += IG_trpls + OG_trpls
522
+
523
+ def tDown_update(self, node, opt_function):
524
+ C = node.child_nodes()
525
+
526
+ for child in C:
527
+ C1 = [c for c in C if c is not child]
528
+ child.nTrpl_out = node.nTrpl_out
529
+
530
+ for i, c1 in enumerate(C1):
531
+ child.nTrpl_out += c1.nTrpl_in
532
+ child.nTrpl_out += (self.nIGs - node.nIGs) * c1.nIGs * child.nOGs
533
+ child.nTrpl_out += (self.nOGs - node.nOGs) * c1.nOGs * child.nIGs
534
+
535
+ for c2 in C1[i + 1:]:
536
+ IG_trpls = c1.nIGs * c2.nIGs * child.nOGs
537
+ OG_trpls = c1.nOGs * c2.nOGs * child.nIGs
538
+
539
+ child.nTrpl_out += IG_trpls + OG_trpls
540
+
541
+ opt_function(child)
542
+
543
+ def prepare_root(self):
544
+ pass
545
+
546
+ def opt_score(self):
547
+ return self.opt_nTrpls / float(self.max_nTrpls) if self.max_nTrpls != 0 else None
548
+
549
+ def report_score(self):
550
+ myScore = self.opt_score()
551
+ if myScore is None:
552
+ self.logger.warning("OG rooting failed because the tree has no outgroup")
553
+ return "Triplet score: " + str(self.opt_score())
554
+
555
+
556
+ class MPR_Tree(Tree_extend):
557
+ # supportive class to implement midpoint-reroot (mpr = mid point reroot, hence the name)G
558
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick", logger_id=1, logger_stream=sys.stderr):
559
+ super(MPR_Tree, self).__init__(ddpTree, tree_file, schema)
560
+ self.logger = new_logger("MPR_Tree_" + str(logger_id), myStream=logger_stream)
561
+ self.reset()
562
+
563
+ def reset(self):
564
+ self.max_distance = -1
565
+ self.opt_root = self.ddpTree.root
566
+ self.opt_x = 0
567
+
568
+ def Node_init(self, node, max_in=None, max_out=-1):
569
+ node.max_in = max_in if max_in else [0, 0]
570
+ node.max_out = max_out
571
+
572
+ def Opt_function(self, node):
573
+ m = max(node.max_in)
574
+ curr_max_distance = m + node.max_out
575
+ x = (node.max_out - m) / 2
576
+ if curr_max_distance > self.max_distance and x >= 0 and x <= node.edge_length:
577
+ self.max_distance = curr_max_distance
578
+ self.opt_x = x
579
+ self.opt_root = node
580
+
581
+ def bUp_update(self, node):
582
+ if not node.is_leaf():
583
+ node.max_in = []
584
+ for child in node.child_nodes():
585
+ node.max_in.append(max(child.max_in) + child.edge_length)
586
+
587
+ def tDown_update(self, node, opt_function):
588
+ child_idx = 0
589
+ for child in node.child_nodes():
590
+ child.max_out = max([node.max_out] + [node.max_in[k] for k in range(len(node.max_in))
591
+ if k != child_idx]) + child.edge_length
592
+ opt_function(child)
593
+ child_idx += 1
594
+
595
+ def prepare_root(self):
596
+ pass
597
+
598
+ def compute_threhold(self, k=3.5):
599
+ self.logger.warning("Trying to compute threshold for MPR_Tree, which is not supported.")
600
+ return 0
601
+
602
+ def opt_score(self):
603
+ return self.max_distance / 2
604
+
605
+ def report_score(self):
606
+ return "Tree height: " + str(self.opt_score())
607
+
608
+
609
+ class minVAR_Base_Tree(Tree_extend):
610
+ # supportive base class to implement VAR-reroot, hence the name
611
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick",logger_id=1,logger_stream=sys.stderr):
612
+ super(minVAR_Base_Tree, self).__init__(ddpTree, tree_file, schema)
613
+ self.logger = new_logger("MinVar_Tree_" + str(logger_id),myStream=logger_stream)
614
+ self.reset()
615
+
616
+ def reset(self):
617
+ self.minVAR = None
618
+ self.opt_root = self.ddpTree.root
619
+ self.opt_x = 0
620
+
621
+ def Node_init(self, node, nleaf=1, sum_in=0, sum_total=0, var=-1):
622
+ node.sum_in = sum_in
623
+ node.sum_total = sum_total
624
+ node.nleaf = nleaf
625
+ node.var = var
626
+
627
+ def Opt_function(self, node, a, b, c):
628
+ self.logger.info("Abstract method! Should never be called")
629
+
630
+ def compute_dRoot_VAR(self):
631
+ cumm = {'ssq': 0, 'sum': 0}
632
+
633
+ def compute_dRoot(node, cumm_l):
634
+ if node.is_leaf():
635
+ cumm['ssq'] += cumm_l ** 2
636
+ cumm['sum'] += cumm_l
637
+ else:
638
+ for child in node.child_nodes():
639
+ compute_dRoot(child, cumm_l + child.edge_length)
640
+
641
+ compute_dRoot(self.get_root(), 0)
642
+ N = self.get_root().nleaf
643
+ root_var = cumm['ssq'] / N - (cumm['sum'] / N) ** 2
644
+ self.get_root().var = root_var
645
+
646
+ def bUp_update(self, node):
647
+ if node.is_leaf():
648
+ node.nleaf = 1
649
+ node.sum_in = 0
650
+ else:
651
+ node.nleaf = 0
652
+ node.sum_in = 0
653
+ for child in node.child_nodes():
654
+ node.nleaf += child.nleaf
655
+ node.sum_in += child.sum_in + child.nleaf * child.edge_length
656
+
657
+ def Update_var(self, child, node, edge_length):
658
+ alpha = 2 * (node.sum_total - 2 * (child.sum_in + child.nleaf * edge_length)) / self.total_leaves
659
+ beta = 1 - 2 * float(child.nleaf) / self.total_leaves
660
+ a = 1 - beta * beta
661
+ b = alpha - 2 * node.sum_total * beta / self.total_leaves
662
+ c = node.var
663
+ child.var = a * edge_length * edge_length + b * edge_length + c
664
+ return a, b, c
665
+
666
+ def tDown_update(self, node, opt_function):
667
+ for child in node.child_nodes():
668
+ child.sum_total = node.sum_total + (self.total_leaves - 2 * child.nleaf) * child.edge_length
669
+ a, b, c = self.Update_var(child, node, child.edge_length)
670
+ opt_function(child, a, b, c)
671
+
672
+ def prepare_root(self):
673
+ root = self.get_root()
674
+ root.sum_total = root.sum_in
675
+ self.compute_dRoot_VAR()
676
+ self.total_leaves = root.nleaf
677
+
678
+ def opt_score(self):
679
+ return self.minVAR
680
+
681
+ def report_score(self):
682
+ return "MinVar score: " + str(self.opt_score())
683
+
684
+
685
+ class MVDF_Tree(minVAR_Base_Tree):
686
+ # supportive class to implement VAR-reroot + deepest node + factorization
687
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick"):
688
+ super(MVDF_Tree, self).__init__(ddpTree, tree_file, schema)
689
+ self.deep_node = None
690
+
691
+ def reset(self):
692
+ super(MVDF_Tree, self).reset()
693
+ self.deep_node = None
694
+
695
+ def Opt_function(self, node, a, b, c):
696
+ x = -b / (2 * a)
697
+ if x >= 0 and x <= node.edge_length:
698
+ # curr_minVAR = a*x*x + b*x + c
699
+ factor = float(node.nleaf) / self.total_leaves
700
+ factor = factor * (1 - factor)
701
+ curr_minVAR = (a * x * x + b * x + c) / factor
702
+
703
+ if node.var < node.parent_node.var:
704
+ deep_node = node
705
+ else:
706
+ deep_node = node.parent_node
707
+
708
+ updateNeed = False
709
+ if (self.deep_node is None) or (deep_node.var < self.deep_node.var):
710
+ self.deep_node = deep_node
711
+ self.minVAR = curr_minVAR
712
+ updateNeed = True
713
+ elif (self.deep_node is deep_node) and (curr_minVAR < self.minVAR):
714
+ self.minVAR = curr_minVAR
715
+ updateNeed = True
716
+
717
+ if updateNeed:
718
+ self.opt_root = node
719
+ self.opt_x = node.edge_length - x
720
+
721
+ # self.logger.info(str(curr_minVAR) + "\t" + node.label
722
+ # + "\t" + str(node.edge_length-x) + "\t" + str(self.Tree_records[node.idx].var)
723
+ # + "\t" + (str(node.parent_node.label) if node.parent_node else "None")
724
+ # + "\t" + str(self.Tree_records[node.parent_node.idx].var))
725
+
726
+ def compute_threshold(self, k=3.5):
727
+ # should be called only AFTER the MV root was found
728
+ mean = (self.opt_root.sum_total - self.opt_x *
729
+ (self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
730
+ factor = float(self.opt_root.nleaf) / self.total_leaves
731
+ factor = factor * (1 - factor)
732
+ rootVar = self.minVAR * factor
733
+ self.logger.info(mean)
734
+ self.logger.info(rootVar)
735
+ std = math.sqrt(rootVar)
736
+ return mean + k * std
737
+
738
+
739
+ class MVD0_Tree(minVAR_Base_Tree):
740
+ # supportive class to implement VAR-reroot + deepest node + no factorization
741
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick"):
742
+ super(MVD0_Tree, self).__init__(ddpTree, tree_file, schema)
743
+ self.deep_node = None
744
+
745
+ def reset(self):
746
+ super(MVD0_Tree, self).reset()
747
+ self.deep_node = None
748
+
749
+ def Opt_function(self, node, a, b, c):
750
+ x = -b / (2 * a)
751
+ if x >= 0 and x <= node.edge_length:
752
+ curr_minVAR = a * x * x + b * x + c
753
+
754
+ if node.var < node.parent_node.var:
755
+ deep_node = node
756
+ else:
757
+ deep_node = node.parent_node
758
+
759
+ updateNeed = False
760
+ if (self.deep_node is None) or (deep_node.var < self.deep_node.var):
761
+ self.deep_node = deep_node
762
+ self.minVAR = curr_minVAR
763
+ updateNeed = True
764
+ elif (self.deep_node is deep_node) and (curr_minVAR < self.minVAR):
765
+ self.minVAR = curr_minVAR
766
+ updateNeed = True
767
+
768
+ if updateNeed:
769
+ self.opt_root = node
770
+ self.opt_x = node.edge_length - x
771
+
772
+ # self.logger.info(str(curr_minVAR) + "\t" + node.label
773
+ # + "\t" + str(node.edge_length-x) + "\t" + str(self.Tree_records[node.idx].var)
774
+ # + "\t" + (str(node.parent_node.label) if node.parent_node else "None")
775
+ # + "\t" + str(self.Tree_records[node.parent_node.idx].var))
776
+
777
+ def compute_threshold(self, k=3.5):
778
+ # should be called only AFTER the MV root was found
779
+ mean = (self.opt_root.sum_total - self.opt_x *
780
+ (self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
781
+ self.logger.info(mean)
782
+ self.logger.info(self.minVAR)
783
+ std = math.sqrt(self.minVAR)
784
+ return mean + k * std
785
+
786
+
787
+ class MV0F_Tree(minVAR_Base_Tree):
788
+ # supportive class to implement VAR-reroot + no deepest node + factorization
789
+ # def __init__(self, ddpTree = None, tree_file = None, schema = "newick"):
790
+ # super().__init__(ddpTree, tree_file, schema)
791
+
792
+ def Opt_function(self, node, a, b, c):
793
+ x = -b / (2 * a)
794
+ if x >= 0 and x <= node.edge_length:
795
+ # curr_minVAR = a*x*x + b*x + c
796
+ factor = float(node.nleaf) / self.total_leaves
797
+ factor = factor * (1 - factor)
798
+ curr_minVAR = (a * x * x + b * x + c) / factor
799
+ if self.minVAR is None or curr_minVAR < self.minVAR:
800
+ self.minVAR = curr_minVAR
801
+ self.opt_root = node
802
+ self.opt_x = node.edge_length - x
803
+
804
+ # self.logger.info(str(curr_minVAR) + "\t" + node.label
805
+ # + "\t" + str(node.edge_length-x) + "\t" + str(self.Tree_records[node.idx].var)
806
+ # + "\t" + (str(node.parent_node.label) if node.parent_node else "None")
807
+ # + "\t" + str(self.Tree_records[node.parent_node.idx].var))
808
+
809
+ def compute_threshold(self, k=3.5):
810
+ # should be called only AFTER the MV root was found
811
+ mean = (self.opt_root.sum_total - self.opt_x *
812
+ (self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
813
+ factor = float(self.opt_root.nleaf) / self.total_leaves
814
+ factor = factor * (1 - factor)
815
+ rootVar = self.minVAR * factor
816
+ self.logger.info(mean)
817
+ self.logger.info(rootVar)
818
+ std = math.sqrt(rootVar)
819
+ return mean + k * std
820
+
821
+
822
+ class MV00_Tree(minVAR_Base_Tree):
823
+ # supportive class to implement VAR-reroot + no deepest node + no factorization
824
+ # def __init__(self, ddpTree = None, tree_file = None, schema = "newick"):
825
+ # super().__init__(ddpTree, tree_file, schema)
826
+
827
+ def Opt_function(self, node, a, b, c):
828
+ x = -b / (2 * a)
829
+ if x >= 0 and x <= node.edge_length:
830
+ curr_minVAR = a * x * x + b * x + c
831
+ if self.minVAR is None or curr_minVAR < self.minVAR:
832
+ self.minVAR = curr_minVAR
833
+ self.opt_root = node
834
+ self.opt_x = node.edge_length - x
835
+
836
+ def compute_threshold(self, k=3.5):
837
+ # should be called only AFTER the MV root was found
838
+ mean = (self.opt_root.sum_total - self.opt_x *
839
+ (self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
840
+ self.logger.info(mean)
841
+ self.logger.info(self.minVAR)
842
+ std = math.sqrt(self.minVAR)
843
+ return mean + k * std
844
+
845
+
846
+ class MBR_Tree(Tree_extend):
847
+ # supportive class to implement midpoint balance root
848
+ def __init__(self, ddpTree=None, tree_file=None, schema="newick"):
849
+ super(MBR_Tree, self).__init__(ddpTree, tree_file, schema)
850
+
851
+ self.BPs = [] # BPs : balance points
852
+ self.opt_root = self.ddpTree.root
853
+ self.opt_x = 0
854
+
855
+ def Node_init(self, node, nleaf=1, sum_in=0, sum_out=-1):
856
+ self.nleaf = nleaf
857
+ self.sum_in = sum_in
858
+ self.sum_out = sum_out
859
+
860
+ def Opt_function(self, node):
861
+ nleaf = node.nleaf
862
+ mean_in = node.sum_in / nleaf
863
+ mean_out = node.sum_out / (self.total_leaves - nleaf)
864
+ x = (mean_out - mean_in) / 2
865
+ if x >= 0 and x <= node.edge_length:
866
+ self.BPs.append((node, x, mean_in + x))
867
+ node.x = x
868
+ node.mean = mean_in + x
869
+ else:
870
+ node.x = None
871
+ node.mean = None
872
+
873
+ def bUp_update(self, node):
874
+ node.sum_in = 0
875
+ if node.is_leaf():
876
+ node.nleaf = 1
877
+ else:
878
+ node.nleaf = 0
879
+ for child in node.child_nodes():
880
+ node.nleaf += child.nleaf
881
+ node.sum_in += child.sum_in + child.nleaf * child.edge_length
882
+
883
+ def tDown_update(self, node, opt_function):
884
+ child_idx = 0
885
+ for child in node.child_nodes():
886
+ child.sum_out = (node.sum_out + node.sum_in + child.edge_length *
887
+ (self.total_leaves - 2 * child.nleaf) - child.sum_in)
888
+ opt_function(child)
889
+ child_idx += 1
890
+
891
+ def prepare_root(self):
892
+ root = self.get_root()
893
+ root.sum_out = 0
894
+ self.total_leaves = root.nleaf
895
+ root.x = None
896
+ root.mean = None
897
+
898
+ def list_balance_points(self):
899
+ self.Topdown_label()
900
+ self.Bottomup_update()
901
+ self.prepare_root()
902
+ self.Topdown_update()
903
+
904
+ for (node, x, mean) in self.BPs:
905
+ if node.is_leaf():
906
+ # self.logger.info(node.label + "\t" + str(x) + "\t" + str(mean))
907
+ self.logger.info(node.label + "\t" + str(x) + "\t" + str(mean))
908
+ else:
909
+ self.logger.info(node.label + "\t" + str(x) + "\t" + str(mean))
910
+
911
+ def build_balance_tree(self):
912
+ self.Topdown_label() # keep this step for now for debugging purpose
913
+ self.Bottomup_update()
914
+ self.prepare_root()
915
+ self.Topdown_update()
916
+
917
+ # self.list_balance_points()
918
+
919
+ self.balance_tree = self.ddpTree.extract_tree()
920
+
921
+ # bottom up pruning
922
+ for node in self.balance_tree.traverse_postorder():
923
+ node.type = "real"
924
+ node.BPbelow = False
925
+
926
+ '''if node.is_leaf():
927
+ self.logger.info("parent: " + node.label)# + "\t" + str(node.extraction_source.x))
928
+ else:
929
+ self.logger.info("parent: " + node.label)#+ "\t" + str(node.extraction_source.x))'''
930
+
931
+ for ch in node.child_nodes():
932
+ '''try:
933
+ self.logger.info("child: " + ch.label)# + "\t" + str(ch.extraction_source.x))
934
+ except:
935
+ self.logger.info("child: " + ch.label) #+ "\t" + str(ch.extraction_source.x))'''
936
+
937
+ if ch.BPbelow or (ch.extraction_source.x is not None):
938
+ node.BPbelow = True
939
+ # node.BPbelow = node.BPbelow or ch.BPbelow or (ch.extraction_source.x is not None)
940
+
941
+ if not ch.BPbelow:
942
+ # remove the whole clade under ch
943
+ # for ch1 in ch.child_nodes():
944
+ # ch.remove_child(ch1)
945
+ edgelen = ch.edge_length
946
+ node.remove_child(ch)
947
+
948
+ if ch.extraction_source.x is not None:
949
+ # add a new node p at the balance point
950
+ # set p to be a child of node (edge length ch.edge_length - x)
951
+ # add a new node ch1 to be another child of p (edge length ch.mean)
952
+ edgelen = ch.edge_length
953
+
954
+ # p = self.ddpTree.node_factory()
955
+ # ch1 = self.ddpTree.node_factory()
956
+ p = Node()
957
+ ch1 = Node()
958
+
959
+ p.type = "bp" # bp: balance-point
960
+ p.ref_child = ch.extraction_source # link p to the original tree (for later use after finding midpoint)
961
+ ch1.type = "dm" # dm: dummy
962
+
963
+ # node.remove_child(ch)
964
+ node.add_child(p)
965
+ p.add_child(ch1)
966
+
967
+ p.edge_length = edgelen - ch.extraction_source.x
968
+ ch1.edge_length = ch.extraction_source.mean
969
+
970
+ elif ch.extraction_source.x is not None:
971
+ # add a new node p at the balance point
972
+ # set p to be a child of node (edge length ch.edge_length - x)
973
+ # set ch to be a child of p (edge length x)
974
+ # add a new node ch1 to be another child of p (edge length ch.mean)
975
+
976
+ edgelen = ch.edge_length
977
+
978
+ # p = self.ddpTree.node_factory()
979
+ p = Node()
980
+ # ch1 = self.ddpTree.node_factory()
981
+ ch1 = Node()
982
+
983
+ p.type = "bp"
984
+ p.ref_child = ch.extraction_source # link p to the original tree (for later use after finding midpoint)
985
+ ch1.type = "dm"
986
+
987
+ node.remove_child(ch)
988
+ node.add_child(p)
989
+ p.add_child(ch)
990
+ p.add_child(ch1)
991
+
992
+ ch.edge_length = ch.extraction_source.x
993
+ p.edge_length = edgelen - ch.extraction_source.x
994
+ ch1.edge_length = ch.extraction_source.mean
995
+
996
+ # topdown pruning
997
+ node = self.balance_tree.root
998
+ nchild = len(node.child_nodes())
999
+ while nchild > 0 and nchild < 2:
1000
+ # node has less than 2 children
1001
+ temp = node
1002
+ node = node.child_nodes()[0]
1003
+ temp.remove_child(node)
1004
+ if node.type == "dm":
1005
+ node = temp
1006
+ break
1007
+ nchild = len(node.child_nodes())
1008
+
1009
+ self.balance_tree.root = node
1010
+ self.balance_tree.root.edge_length = None
1011
+ # balance_tree.root = None
1012
+
1013
+ # mptre = MPR_Tree(ddpTree=balance_tree)
1014
+ # mptre.tree_as_newick()
1015
+
1016
+ # return balance_tree
1017
+
1018
+ def find_root(self):
1019
+ self.build_balance_tree()
1020
+ mptre = MPR_Tree(ddpTree=self.balance_tree)
1021
+ mptre.tree_as_newick()
1022
+ mptre.find_root()
1023
+
1024
+ self.logger.info(mptre.opt_root.type)
1025
+
1026
+ if mptre.opt_root.type == "bp":
1027
+ self.opt_root = mptre.opt_root.ref_child
1028
+ self.opt_x = mptre.opt_root.ref_child.x + mptre.opt_x
1029
+ elif mptre.opt_root.type == "dm":
1030
+ self.logger.info("Hmm... Is it possible that a dummy was found as the opt_root?")
1031
+ else:
1032
+ self.opt_root = mptre.opt_root.extraction_source
1033
+ self.opt_x = mptre.opt_x
1034
+
1035
+ self.logger.info(self.opt_root.label)
1036
+ self.logger.info(self.opt_x)
1037
+
1038
+
1039
+ def FastRoot(args):
1040
+
1041
+ stream = args.infofile if args.infofile else stderr
1042
+ logger = new_logger(__name__, myStream=stream)
1043
+ logger.info("Running " + fastroot.PROGRAM_NAME + " version " + fastroot.PROGRAM_VERSION)
1044
+ new_argv = re.sub(' +', ' ', " ".join(argv).replace("\n", " "))
1045
+ logger.info(fastroot.PROGRAM_NAME + " was called as follows: " + new_argv)
1046
+
1047
+ METHOD2FUNC = {'MP': MPR_Tree, 'MV': MV00_Tree, 'OG': OGR_Tree, 'RTT': RTT_Tree}
1048
+ METHOD2DESC = {'MP': "Midpoint", 'MV': "MinVar", 'OG': "Outgroup", 'RTT': "Root-to-Tip"}
1049
+ method = args.method.upper()
1050
+
1051
+ # reading outgroups
1052
+ if args.outgroups:
1053
+ if method != 'OG':
1054
+ method = 'OG'
1055
+ logger.warning("The rooting method is set to outgroup rooting (OG) due to the presence of outgroups")
1056
+ if path.exists(args.outgroups):
1057
+ OGs = []
1058
+ for line in open(args.outgroups, 'r'):
1059
+ OGs.append(line.strip())
1060
+ else:
1061
+ OGs = args.outgroups.split()
1062
+ else:
1063
+ OGs = None
1064
+
1065
+ # reading sampling times
1066
+ if args.smplTimes:
1067
+ smplTimes = {}
1068
+ for line in args.smplTimes:
1069
+ sp, t = line.strip().split()
1070
+ smplTimes[sp] = float(t)
1071
+ if method != 'RTT':
1072
+ method = 'RTT'
1073
+ logger.warning(
1074
+ "The rooting method is set to root-to-tip rooting (RTT) due to the presence of sampling times")
1075
+
1076
+ if method == 'RTT' and args.smplTimes is None:
1077
+ logger.error("Need sampling times for root-to-tip rooting")
1078
+ exit()
1079
+ elif method == 'OG' and args.outgroups is None:
1080
+ logger.error("Need outgroups for outgroup rooting")
1081
+ exit()
1082
+
1083
+ assert method in METHOD2FUNC, "Invalid method! Valid options: MP for midpoint, MV for minVAR, OG for outgroups, RTT for root-to-tip"
1084
+ logger.info("Rooting Method: " + METHOD2DESC[method] + " Rooting")
1085
+
1086
+ if method == 'RTT':
1087
+ if args.maxIter and args.maxIter < 1000:
1088
+ logger.warning(
1089
+ "Invalid number of maximum iterations (-x). Must be at least 1000. Set back to 1000 by default.")
1090
+ maxIter = max(1000, args.maxIter) if args.maxIter else 1000
1091
+ logger.info("Maximum iterations: " + str(maxIter))
1092
+ elif args.maxIter is not None:
1093
+ logger.warning("The maximum number of iterations (-x) is only used with root-to-tip rooting (RTT)")
1094
+
1095
+ # read and root each tree
1096
+ for i, line in enumerate(args.input):
1097
+ tree = read_tree(line, schema=args.schema.lower())
1098
+ if method == 'OG':
1099
+ a_tree = OGR_Tree(OGs, ddpTree=tree, logger_id=i + 1, logger_stream=stream)
1100
+ elif method == 'RTT':
1101
+ a_tree = RTT_Tree(smplTimes, ddpTree=tree, logger_id=i + 1, logger_stream=stream, maxIter=maxIter)
1102
+ else:
1103
+ a_tree = METHOD2FUNC[method](ddpTree=tree, logger_id=i + 1, logger_stream=stream)
1104
+
1105
+ a_tree.Reroot()
1106
+ logger.info("Tree " + str(i + 1) + " " + a_tree.report_score())
1107
+ a_tree.tree_as_newick(outstream=args.outfile)
1108
+
1109
+
1110
+ if __name__ == '__main__':
1111
+
1112
+ parser = argparse.ArgumentParser()
1113
+ parser.add_argument('-i', '--input', required=False, type=argparse.FileType('r'), default=stdin, help="Input File (default is STDIN)")
1114
+ parser.add_argument('-m', '--method', required=False, type=str, default="MV", help="Method (MP for midpoint, MV for minVAR, OG for outgroup, RTT for root-to-tip) (default is MV)")
1115
+ parser.add_argument('-g', '--outgroups', required=False, type=str, help="Specify the outgroups. If specifying a list of outgroups, put them between quotes (i.e. \"). Otherwise, specifying a file which containts all the outgroups. Can only be used with -m OG")
1116
+ parser.add_argument('-t', '--smplTimes', required=False, type=argparse.FileType('r'), help="The file containing the sampling times at leaves; to be used with -m RTT")
1117
+ parser.add_argument('-o', '--outfile', required=False, type=argparse.FileType('w'), default=stdout, help="Output File (default is STDOUT)")
1118
+ parser.add_argument('-s', '--schema', required=False, type=str, default="newick", help="Schema of your input treefile (default is newick)")
1119
+ parser.add_argument('-f', '--infofile', required=False, type=argparse.FileType('w'), default=None, help="Save all the logging to this file. Default: print to stderr")
1120
+ parser.add_argument("-x", "--maxIter", required=False, type=int, default=None, help="Maximum number of iterations to run cvxopt")
1121
+ args = vars(parser.parse_args())
1122
+ FastRoot(args)