treesak 1.53.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- TreeSAK/ALE.py +63 -0
- TreeSAK/ALE1.py +268 -0
- TreeSAK/ALE2.py +168 -0
- TreeSAK/ALE2RTC.py +30 -0
- TreeSAK/ALE3.py +205 -0
- TreeSAK/ALE4.py +636 -0
- TreeSAK/ALE5.py +210 -0
- TreeSAK/ALE6.py +401 -0
- TreeSAK/ALE7.py +126 -0
- TreeSAK/ALE_backup.py +1081 -0
- TreeSAK/AssessCVG.py +128 -0
- TreeSAK/AssessMarker.py +306 -0
- TreeSAK/AssessMarkerDeltaLL.py +257 -0
- TreeSAK/AssessMarkerPA.py +317 -0
- TreeSAK/AssessPB.py +113 -0
- TreeSAK/BMGE.jar +0 -0
- TreeSAK/BMGE.py +49 -0
- TreeSAK/C60SR4.nex +127 -0
- TreeSAK/CompareMCMC.py +138 -0
- TreeSAK/ConcateMSA.py +111 -0
- TreeSAK/ConvertMSA.py +135 -0
- TreeSAK/Dir.rb +82 -0
- TreeSAK/ExtractMarkerSeq.py +263 -0
- TreeSAK/FastRoot.py +1175 -0
- TreeSAK/FastRoot_backup.py +1122 -0
- TreeSAK/FigTree.py +34 -0
- TreeSAK/GTDB_tree.py +76 -0
- TreeSAK/GeneTree.py +142 -0
- TreeSAK/KEGG_Luo17.py +807 -0
- TreeSAK/LcaToLeaves.py +66 -0
- TreeSAK/MarkerRef2Tree.py +616 -0
- TreeSAK/MarkerRef2Tree_backup.py +628 -0
- TreeSAK/MarkerSeq2Tree.py +299 -0
- TreeSAK/MarkerSeq2Tree_backup.py +259 -0
- TreeSAK/ModifyTopo.py +116 -0
- TreeSAK/Newick_tree_plotter.py +79 -0
- TreeSAK/OMA.py +170 -0
- TreeSAK/OMA2.py +212 -0
- TreeSAK/OneLineAln.py +50 -0
- TreeSAK/PB.py +155 -0
- TreeSAK/PMSF.py +115 -0
- TreeSAK/PhyloBiAssoc.R +84 -0
- TreeSAK/PhyloBiAssoc.py +167 -0
- TreeSAK/PlotMCMC.py +41 -0
- TreeSAK/PlotMcmcNode.py +152 -0
- TreeSAK/PlotMcmcNode_old.py +252 -0
- TreeSAK/RootTree.py +101 -0
- TreeSAK/RootTreeGTDB.py +371 -0
- TreeSAK/RootTreeGTDB214.py +288 -0
- TreeSAK/RootTreeGTDB220.py +300 -0
- TreeSAK/SequentialDating.py +16 -0
- TreeSAK/SingleAleHGT.py +157 -0
- TreeSAK/SingleLinePhy.py +50 -0
- TreeSAK/SliceMSA.py +142 -0
- TreeSAK/SplitScore.py +21 -0
- TreeSAK/SplitScore1.py +177 -0
- TreeSAK/SplitScore1OMA.py +148 -0
- TreeSAK/SplitScore2.py +608 -0
- TreeSAK/TaxaCountStats.R +256 -0
- TreeSAK/TaxonTree.py +47 -0
- TreeSAK/TreeSAK_config.py +32 -0
- TreeSAK/VERSION +164 -0
- TreeSAK/VisHPD95.R +45 -0
- TreeSAK/VisHPD95.py +200 -0
- TreeSAK/__init__.py +0 -0
- TreeSAK/ale_parser.py +74 -0
- TreeSAK/ale_splitter.py +63 -0
- TreeSAK/alignment_pruner.pl +1471 -0
- TreeSAK/assessOG.py +45 -0
- TreeSAK/batch_itol.py +171 -0
- TreeSAK/catfasta2phy.py +140 -0
- TreeSAK/cogTree.py +185 -0
- TreeSAK/compare_trees.R +30 -0
- TreeSAK/compare_trees.py +255 -0
- TreeSAK/dating.py +264 -0
- TreeSAK/dating_ss.py +361 -0
- TreeSAK/deltall.py +82 -0
- TreeSAK/do_rrtc.rb +464 -0
- TreeSAK/fa2phy.py +42 -0
- TreeSAK/filter_rename_ar53.py +118 -0
- TreeSAK/format_leaf_name.py +70 -0
- TreeSAK/gap_stats.py +38 -0
- TreeSAK/get_SCG_tree.py +742 -0
- TreeSAK/get_arCOG_seq.py +97 -0
- TreeSAK/global_functions.py +222 -0
- TreeSAK/gnm_leaves.py +43 -0
- TreeSAK/iTOL.py +791 -0
- TreeSAK/iTOL_gene_tree.py +80 -0
- TreeSAK/itol_msa_stats.py +56 -0
- TreeSAK/keep_highest_rrtc.py +37 -0
- TreeSAK/koTree.py +194 -0
- TreeSAK/label_gene_tree_by_gnm.py +34 -0
- TreeSAK/label_tree.R +75 -0
- TreeSAK/label_tree.py +121 -0
- TreeSAK/mad.py +708 -0
- TreeSAK/mcmc2tree.py +58 -0
- TreeSAK/mcmcTC copy.py +92 -0
- TreeSAK/mcmcTC.py +104 -0
- TreeSAK/mcmctree_vs_reltime.R +44 -0
- TreeSAK/mcmctree_vs_reltime.py +252 -0
- TreeSAK/merge_pdf.py +32 -0
- TreeSAK/pRTC.py +56 -0
- TreeSAK/parse_mcmctree.py +198 -0
- TreeSAK/parse_reltime.py +141 -0
- TreeSAK/phy2fa.py +37 -0
- TreeSAK/plot_distruibution_th.py +165 -0
- TreeSAK/prep_mcmctree_ctl.py +92 -0
- TreeSAK/print_leaves.py +32 -0
- TreeSAK/pruneMSA.py +63 -0
- TreeSAK/recode.py +73 -0
- TreeSAK/remove_bias.R +112 -0
- TreeSAK/rename_leaves.py +78 -0
- TreeSAK/replace_clade.py +55 -0
- TreeSAK/root_with_out_group.py +84 -0
- TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
- TreeSAK/subsample_drep_gnms.py +74 -0
- TreeSAK/subset.py +69 -0
- TreeSAK/subset_tree_stupid_old_way.py +193 -0
- TreeSAK/supertree.py +330 -0
- TreeSAK/tmp_1.py +19 -0
- TreeSAK/tmp_2.py +19 -0
- TreeSAK/tmp_3.py +120 -0
- TreeSAK/tmp_4.py +43 -0
- TreeSAK/tmp_5.py +12 -0
- TreeSAK/weighted_rand.rb +23 -0
- treesak-1.53.3.data/scripts/TreeSAK +955 -0
- treesak-1.53.3.dist-info/LICENSE +674 -0
- treesak-1.53.3.dist-info/METADATA +27 -0
- treesak-1.53.3.dist-info/RECORD +131 -0
- treesak-1.53.3.dist-info/WHEEL +5 -0
- treesak-1.53.3.dist-info/top_level.txt +1 -0
TreeSAK/FastRoot.py
ADDED
|
@@ -0,0 +1,1175 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import sys
|
|
4
|
+
import numpy
|
|
5
|
+
import cvxopt
|
|
6
|
+
import logging
|
|
7
|
+
import argparse
|
|
8
|
+
from numpy import *
|
|
9
|
+
from treeswift import *
|
|
10
|
+
from sys import stdin, stdout, argv, exit, stderr
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
########################################################################################################################
|
|
14
|
+
|
|
15
|
+
PROGRAM_NAME = "FastRoot"
|
|
16
|
+
PROGRAM_AUTHOR = ["Uyen Mai", "Merve Kilic", "Erfan Sayyari", "Siavash Mirarab"]
|
|
17
|
+
PROGRAM_LICENSE = "MIT License"
|
|
18
|
+
PROGRAM_VERSION = "1.5"
|
|
19
|
+
PROGRAM_YEAR = "2017"
|
|
20
|
+
PROGRAM_INSTITUTE = "University of California at San Diego"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def new_logger(myName, myLevel=logging.INFO, myStream=stdout):
|
|
24
|
+
logger = logging.getLogger(myName)
|
|
25
|
+
logger.setLevel(myLevel)
|
|
26
|
+
handler = logging.StreamHandler(myStream)
|
|
27
|
+
formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
|
|
28
|
+
handler.setFormatter(formatter)
|
|
29
|
+
logger.addHandler(handler)
|
|
30
|
+
logger.propagate = False
|
|
31
|
+
|
|
32
|
+
return logger
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
########################################################################################################################
|
|
36
|
+
|
|
37
|
+
'''
|
|
38
|
+
logger = logging.getLogger("Tree_extend.py")
|
|
39
|
+
logger.setLevel(logging.INFO)
|
|
40
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
41
|
+
formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
|
|
42
|
+
handler.setFormatter(formatter)
|
|
43
|
+
logger.addHandler(handler)
|
|
44
|
+
logger.propagate = False
|
|
45
|
+
'''
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class Tree_extend(object):
|
|
49
|
+
def __init__(self, ddpTree=None, tree_file=None, schema="newick"): # ,logger_id=1,logger_stream=sys.stderr):
|
|
50
|
+
# self.logger = new_logger(__name__+ "_" + str(logger_id),myStream=logger_stream)
|
|
51
|
+
if tree_file:
|
|
52
|
+
self.ddpTree = read_tree(tree_file, schema)
|
|
53
|
+
else:
|
|
54
|
+
self.ddpTree = ddpTree
|
|
55
|
+
|
|
56
|
+
def Bottomup_label(self):
|
|
57
|
+
# assign each node a label so that we can later relate to it
|
|
58
|
+
i = 0
|
|
59
|
+
for node in self.ddpTree.traverse_postorder():
|
|
60
|
+
if node.is_leaf():
|
|
61
|
+
node.name = 'L' + str(i)
|
|
62
|
+
else:
|
|
63
|
+
node.name = 'I' + str(i)
|
|
64
|
+
i += 1
|
|
65
|
+
|
|
66
|
+
def Topdown_label(self, label_type="all"):
|
|
67
|
+
# assign each node a label so that we can later relate to it
|
|
68
|
+
i = 0
|
|
69
|
+
|
|
70
|
+
for node in self.ddpTree.traverse_preorder():
|
|
71
|
+
if node.is_leaf():
|
|
72
|
+
if label_type == "all" or label_type == "leaves":
|
|
73
|
+
node.name = 'L' + str(i)
|
|
74
|
+
else:
|
|
75
|
+
node.name = node.label
|
|
76
|
+
else:
|
|
77
|
+
if label_type == "all" or label_type == "internal":
|
|
78
|
+
node.name = 'I' + str(i)
|
|
79
|
+
else:
|
|
80
|
+
node.name = node.label
|
|
81
|
+
i += 1
|
|
82
|
+
|
|
83
|
+
def Bottomup_update(self):
|
|
84
|
+
for node in self.ddpTree.traverse_postorder():
|
|
85
|
+
self.Node_init(node)
|
|
86
|
+
self.bUp_update(node)
|
|
87
|
+
|
|
88
|
+
def Topdown_update(self):
|
|
89
|
+
for node in self.ddpTree.traverse_preorder():
|
|
90
|
+
self.tDown_update(node, self.Opt_function)
|
|
91
|
+
|
|
92
|
+
def compute_distances(self):
|
|
93
|
+
D = {}
|
|
94
|
+
|
|
95
|
+
def __compute_dRoot__(node, cumm_l):
|
|
96
|
+
if node.is_leaf():
|
|
97
|
+
D[node.name] = cumm_l
|
|
98
|
+
else:
|
|
99
|
+
for child in node.child_nodes():
|
|
100
|
+
__compute_dRoot__(child, cumm_l + child.edge_length)
|
|
101
|
+
|
|
102
|
+
__compute_dRoot__(self.ddpTree.root, 0)
|
|
103
|
+
return D
|
|
104
|
+
|
|
105
|
+
def compute_ingroup_distances(self):
|
|
106
|
+
D = []
|
|
107
|
+
|
|
108
|
+
def __compute_dLeaf__(node, cumm_l):
|
|
109
|
+
if node.is_leaf():
|
|
110
|
+
D.append(cumm_l)
|
|
111
|
+
else:
|
|
112
|
+
for child in node.child_nodes():
|
|
113
|
+
__compute_dLeaf__(child, cumm_l + child.edge_length)
|
|
114
|
+
|
|
115
|
+
children = self.ddpTree.root.child_nodes()
|
|
116
|
+
crowded_child = None
|
|
117
|
+
maxleaf = -1
|
|
118
|
+
|
|
119
|
+
for node in children:
|
|
120
|
+
if node.nleaf > maxleaf:
|
|
121
|
+
maxleaf = node.nleaf
|
|
122
|
+
crowded_child = node
|
|
123
|
+
|
|
124
|
+
__compute_dLeaf__(children[1], 0)
|
|
125
|
+
|
|
126
|
+
return D
|
|
127
|
+
|
|
128
|
+
def filter_branch(self, threshold=None):
|
|
129
|
+
# filter out abnormally long branches
|
|
130
|
+
i = 1
|
|
131
|
+
self.logger.info("Iteration: " + str(i))
|
|
132
|
+
self.Reroot()
|
|
133
|
+
while 1:
|
|
134
|
+
check = self.filter_by_threshold(threshold=threshold)
|
|
135
|
+
if (not check):
|
|
136
|
+
self.logger.info("I could not remove anything more! I stop here!")
|
|
137
|
+
break
|
|
138
|
+
i += 1
|
|
139
|
+
self.logger.info("Iteration: " + str(i))
|
|
140
|
+
self.reset()
|
|
141
|
+
self.Reroot()
|
|
142
|
+
|
|
143
|
+
def filter_by_threshold(self, threshold=None, k=3.5):
|
|
144
|
+
if threshold is None:
|
|
145
|
+
threshold = self.compute_threshold(k=k)
|
|
146
|
+
|
|
147
|
+
def __filter__(node, cumm_l):
|
|
148
|
+
removed = False
|
|
149
|
+
node.child_removed = False
|
|
150
|
+
for child in node.child_nodes():
|
|
151
|
+
check = __filter__(child, cumm_l + child.edge_length)
|
|
152
|
+
removed = removed or check
|
|
153
|
+
|
|
154
|
+
p = node.parent_node
|
|
155
|
+
# if ( cumm_l > threshold ) or ( node.child_removed and len(node.child_nodes()) == 0 ):
|
|
156
|
+
if (cumm_l > threshold) or (node.child_removed and node.num_children() == 0):
|
|
157
|
+
# remove node
|
|
158
|
+
p.remove_child(node)
|
|
159
|
+
# update parent node
|
|
160
|
+
p.child_removed = True
|
|
161
|
+
removed = True
|
|
162
|
+
try:
|
|
163
|
+
self.logger.info(node.label + " removed")
|
|
164
|
+
except:
|
|
165
|
+
self.logger.info(node.name + " removed")
|
|
166
|
+
# elif len(node.child_nodes()) == 1:
|
|
167
|
+
elif node.num_child_nodes() == 1:
|
|
168
|
+
# remove node and attach its only child to its parent
|
|
169
|
+
e1 = node.edge_length
|
|
170
|
+
child = node.child_nodes()[0]
|
|
171
|
+
e2 = child.edge_length
|
|
172
|
+
p.remove_child(node)
|
|
173
|
+
node.remove_child(child)
|
|
174
|
+
p.add_child(child)
|
|
175
|
+
child.edge_length = e1 + e2
|
|
176
|
+
return removed
|
|
177
|
+
|
|
178
|
+
return __filter__(self.get_root(), 0)
|
|
179
|
+
|
|
180
|
+
def compute_threhold(self, k=3.5):
|
|
181
|
+
self.logger.warning("Abstract class! Should never be called")
|
|
182
|
+
return 0
|
|
183
|
+
|
|
184
|
+
def reset(self):
|
|
185
|
+
self.logger.warning("Abstract class! Should never be called")
|
|
186
|
+
|
|
187
|
+
def find_root(self):
|
|
188
|
+
self.Topdown_label() # temporarily included for debugging
|
|
189
|
+
self.Bottomup_update()
|
|
190
|
+
self.prepare_root()
|
|
191
|
+
self.Topdown_update()
|
|
192
|
+
|
|
193
|
+
def opt_score(self):
|
|
194
|
+
self.logger.warning("Abstract class! Should never be called")
|
|
195
|
+
|
|
196
|
+
def report_score(self):
|
|
197
|
+
self.logger.warning("Abstract class! Should never be called")
|
|
198
|
+
|
|
199
|
+
def Reroot(self):
|
|
200
|
+
self.find_root()
|
|
201
|
+
# self.report_score()
|
|
202
|
+
# d2currRoot = 0
|
|
203
|
+
# br2currRoot = 0
|
|
204
|
+
if self.opt_root != self.ddpTree.root:
|
|
205
|
+
# d2currRoot,br2currRoot = self.reroot_at_edge(self.opt_root.edge, self.opt_root.edge_length-self.opt_x, self.opt_x)
|
|
206
|
+
self.reroot_at_edge(self.opt_root, self.opt_x)
|
|
207
|
+
# self.ddpTree.reroot(self.opt_root,self.opt_x)
|
|
208
|
+
|
|
209
|
+
# return head_id, tail_id, edge_length, self.opt_x
|
|
210
|
+
# return d2currRoot,br2currRoot
|
|
211
|
+
|
|
212
|
+
def Opt_function(self, node):
|
|
213
|
+
self.logger.warning("Abstract method! Should never be called")
|
|
214
|
+
|
|
215
|
+
def tree_as_newick(self, outstream=sys.stdout, label_by_name=False):
|
|
216
|
+
# dendropy's method to write newick seems to have problem ...
|
|
217
|
+
self.__write_newick(self.ddpTree.root, outstream, label_by_name=label_by_name)
|
|
218
|
+
outstream.write(";\n")
|
|
219
|
+
|
|
220
|
+
# outstream.write(bytes(";\n", "ascii"))
|
|
221
|
+
|
|
222
|
+
def __write_newick(self, node, outstream, label_by_name=False):
|
|
223
|
+
if node.is_leaf():
|
|
224
|
+
if label_by_name:
|
|
225
|
+
outstream.write(str(node.name))
|
|
226
|
+
# outstream.write(bytes(str(node.name), "ascii"))
|
|
227
|
+
else:
|
|
228
|
+
try:
|
|
229
|
+
outstream.write(node.label)
|
|
230
|
+
# outstream.write(bytes(node.label, "ascii"))
|
|
231
|
+
except:
|
|
232
|
+
outstream.write(node.label)
|
|
233
|
+
# outstream.write(bytes(str(node.label), "ascii"))
|
|
234
|
+
else:
|
|
235
|
+
outstream.write('(')
|
|
236
|
+
# outstream.write(bytes('(', "ascii"))
|
|
237
|
+
is_first_child = True
|
|
238
|
+
for child in node.child_nodes():
|
|
239
|
+
if is_first_child:
|
|
240
|
+
is_first_child = False
|
|
241
|
+
else:
|
|
242
|
+
outstream.write(',')
|
|
243
|
+
# outstream.write(bytes(',', "ascii"))
|
|
244
|
+
self.__write_newick(child, outstream, label_by_name=label_by_name)
|
|
245
|
+
outstream.write(')')
|
|
246
|
+
# outstream.write(bytes(')', "ascii"))
|
|
247
|
+
if not node.is_leaf():
|
|
248
|
+
if label_by_name:
|
|
249
|
+
outstream.write(str(node.name))
|
|
250
|
+
# outstream.write(bytes(str(node.name), "ascii"))
|
|
251
|
+
elif node.label is not None:
|
|
252
|
+
outstream.write(str(node.label))
|
|
253
|
+
# outstream.write(bytes(str(node.label), "ascii"))
|
|
254
|
+
|
|
255
|
+
if not node.edge_length is None:
|
|
256
|
+
outstream.write(":" + str(node.edge_length))
|
|
257
|
+
|
|
258
|
+
# outstream.write(bytes(":" + str(node.edge_length), "ascii"))
|
|
259
|
+
|
|
260
|
+
def reroot_at_edge(self, node, length):
|
|
261
|
+
# the method provided by dendropy DOESN'T seem to work ...
|
|
262
|
+
# change edge to opt_root
|
|
263
|
+
length1 = node.edge_length - length
|
|
264
|
+
length2 = length
|
|
265
|
+
if not node:
|
|
266
|
+
return
|
|
267
|
+
head = node # opt_root = v = node
|
|
268
|
+
tail = node.parent # u parent of opt_root
|
|
269
|
+
if not tail:
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
if (length2 == 0) and head.is_leaf():
|
|
273
|
+
return 0, 0
|
|
274
|
+
|
|
275
|
+
# new_root = self.ddpTree.node_factory()
|
|
276
|
+
new_root = Node()
|
|
277
|
+
|
|
278
|
+
tail.remove_child(head)
|
|
279
|
+
|
|
280
|
+
new_root.add_child(head)
|
|
281
|
+
head.edge_length = length2
|
|
282
|
+
|
|
283
|
+
p = tail.parent
|
|
284
|
+
l = tail.edge_length
|
|
285
|
+
|
|
286
|
+
new_root.add_child(tail)
|
|
287
|
+
tail.edge_length = length1
|
|
288
|
+
|
|
289
|
+
br2currRoot = 0
|
|
290
|
+
d2currRoot = length1
|
|
291
|
+
|
|
292
|
+
# if tail.label == self.ddpTree.root.label:
|
|
293
|
+
if (tail is self.ddpTree.root):
|
|
294
|
+
head = new_root
|
|
295
|
+
|
|
296
|
+
while tail is not self.ddpTree.root:
|
|
297
|
+
# MAD@ add
|
|
298
|
+
# q = tail.parent #tail should have 2 parents right now: new_root and its old parent
|
|
299
|
+
q = head.parent
|
|
300
|
+
# End MAD@ add
|
|
301
|
+
head = tail
|
|
302
|
+
tail = p
|
|
303
|
+
p = tail.parent
|
|
304
|
+
|
|
305
|
+
br2currRoot += 1
|
|
306
|
+
d2currRoot += l
|
|
307
|
+
|
|
308
|
+
l1 = tail.edge_length
|
|
309
|
+
tail.remove_child(head)
|
|
310
|
+
# MAD@ add
|
|
311
|
+
head.parent = q
|
|
312
|
+
# End MAD@ add
|
|
313
|
+
|
|
314
|
+
head.add_child(tail)
|
|
315
|
+
tail.edge_length = l
|
|
316
|
+
l = l1
|
|
317
|
+
|
|
318
|
+
# out of while loop: tail IS now tree.root
|
|
319
|
+
if tail.num_children() == 1:
|
|
320
|
+
# merge the 2 branches of the old root and adjust the branch length
|
|
321
|
+
# sis = [child for child in tail.child_nodes()][0]
|
|
322
|
+
sis = tail.child_nodes()[0]
|
|
323
|
+
l = sis.edge_length
|
|
324
|
+
tail.remove_child(sis)
|
|
325
|
+
head.add_child(sis)
|
|
326
|
+
sis.edge_length = l + tail.edge_length
|
|
327
|
+
head.remove_child(tail)
|
|
328
|
+
# tail.remove_child(head)
|
|
329
|
+
|
|
330
|
+
new_root.name = self.ddpTree.root.name
|
|
331
|
+
self.ddpTree.root.name = "OLD"
|
|
332
|
+
self.ddpTree.root = new_root
|
|
333
|
+
|
|
334
|
+
### MAD@ add
|
|
335
|
+
# for node in self.ddpTree.traverse_postorder():
|
|
336
|
+
# for child in node.child_nodes():
|
|
337
|
+
# if child.parent_node is not node:
|
|
338
|
+
# logger.info("Error found!")
|
|
339
|
+
# child.parent_node = node
|
|
340
|
+
### MAD@ add
|
|
341
|
+
|
|
342
|
+
return d2currRoot, br2currRoot
|
|
343
|
+
|
|
344
|
+
def get_root(self):
|
|
345
|
+
return self.ddpTree.root
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class OGR_Tree(Tree_extend):
|
|
349
|
+
# supportive class to implement outgroup-reroot (OGR = outgroup reroot, hence the name)
|
|
350
|
+
# this rooting method solve the difficulty in finding the root when there are mulitple outgroups
|
|
351
|
+
# and they are not monophyletic. It seeks for the rooting place that maximizes the triplet score
|
|
352
|
+
# of the specified outgroups.
|
|
353
|
+
def __init__(self, outgroups, ddpTree=None, tree_file=None, schema="newick", logger_id=1, logger_stream=sys.stderr):
|
|
354
|
+
super(OGR_Tree, self).__init__(ddpTree, tree_file, schema)
|
|
355
|
+
self.logger = new_logger("OGR_Tree_" + str(logger_id), myStream=logger_stream)
|
|
356
|
+
# L = self.ddpTree.leaf_nodes()
|
|
357
|
+
L = []
|
|
358
|
+
for leaf in self.ddpTree.traverse_leaves():
|
|
359
|
+
L.append(leaf)
|
|
360
|
+
self.OGs = set([x.label for x in L if x.label in set(outgroups)])
|
|
361
|
+
self.nOGs = len(self.OGs)
|
|
362
|
+
self.nIGs = len(L) - self.nOGs
|
|
363
|
+
self.max_nTrpls = self.nIGs * self.nOGs * (self.nOGs - 1) / 2 + self.nOGs * self.nIGs * (self.nIGs - 1) / 2
|
|
364
|
+
self.reset()
|
|
365
|
+
|
|
366
|
+
def reset(self):
|
|
367
|
+
self.opt_root = self.ddpTree.root
|
|
368
|
+
self.opt_nTrpls = 0
|
|
369
|
+
|
|
370
|
+
def Node_init(self, node, nTrpl_in=0, nTrpl_out=0, nOGs=0, nIGs=0):
|
|
371
|
+
node.nTrpl_in = nTrpl_in
|
|
372
|
+
node.nTrpl_out = nTrpl_out
|
|
373
|
+
node.nOGs = nOGs
|
|
374
|
+
node.nIGs = nIGs
|
|
375
|
+
|
|
376
|
+
def Opt_function(self, node):
|
|
377
|
+
curr_nTrpls = node.nTrpl_in + node.nTrpl_out
|
|
378
|
+
if curr_nTrpls > self.opt_nTrpls:
|
|
379
|
+
self.opt_nTrpls = curr_nTrpls
|
|
380
|
+
self.opt_root = node
|
|
381
|
+
self.opt_x = node.edge_length / 2 # NOTE: this method does not consider branch length, the *middle point* of the edge is just arbitrarily chosen
|
|
382
|
+
|
|
383
|
+
def bUp_update(self, node):
|
|
384
|
+
if node.is_leaf():
|
|
385
|
+
node.nOGs = 1 if node.label in self.OGs else 0
|
|
386
|
+
node.nIGs = 1 if node.nOGs == 0 else 0
|
|
387
|
+
else:
|
|
388
|
+
C = node.child_nodes()
|
|
389
|
+
|
|
390
|
+
node.nOGs = sum([c.nOGs for c in C])
|
|
391
|
+
node.nIGs = sum([c.nIGs for c in C])
|
|
392
|
+
|
|
393
|
+
node.nTrpl_in = sum([c.nTrpl_in for c in C])
|
|
394
|
+
|
|
395
|
+
for i, c1 in enumerate(C):
|
|
396
|
+
for c2 in C[i + 1:]:
|
|
397
|
+
IG_trpls = c1.nIGs * c2.nIGs * (self.nOGs - node.nOGs)
|
|
398
|
+
OG_trpls = c1.nOGs * c2.nOGs * (self.nIGs - node.nIGs)
|
|
399
|
+
node.nTrpl_in += IG_trpls + OG_trpls
|
|
400
|
+
|
|
401
|
+
def tDown_update(self, node, opt_function):
|
|
402
|
+
C = node.child_nodes()
|
|
403
|
+
|
|
404
|
+
for child in C:
|
|
405
|
+
C1 = [c for c in C if c is not child]
|
|
406
|
+
child.nTrpl_out = node.nTrpl_out
|
|
407
|
+
|
|
408
|
+
for i, c1 in enumerate(C1):
|
|
409
|
+
child.nTrpl_out += c1.nTrpl_in
|
|
410
|
+
child.nTrpl_out += (self.nIGs - node.nIGs) * c1.nIGs * child.nOGs
|
|
411
|
+
child.nTrpl_out += (self.nOGs - node.nOGs) * c1.nOGs * child.nIGs
|
|
412
|
+
|
|
413
|
+
for c2 in C1[i + 1:]:
|
|
414
|
+
IG_trpls = c1.nIGs * c2.nIGs * child.nOGs
|
|
415
|
+
OG_trpls = c1.nOGs * c2.nOGs * child.nIGs
|
|
416
|
+
|
|
417
|
+
child.nTrpl_out += IG_trpls + OG_trpls
|
|
418
|
+
|
|
419
|
+
opt_function(child)
|
|
420
|
+
|
|
421
|
+
def prepare_root(self):
|
|
422
|
+
pass
|
|
423
|
+
|
|
424
|
+
def opt_score(self):
|
|
425
|
+
return self.opt_nTrpls / float(self.max_nTrpls) if self.max_nTrpls != 0 else None
|
|
426
|
+
|
|
427
|
+
def report_score(self):
|
|
428
|
+
myScore = self.opt_score()
|
|
429
|
+
if myScore is None:
|
|
430
|
+
self.logger.warning("OG rooting failed because the tree has no outgroup")
|
|
431
|
+
return "Triplet score: " + str(self.opt_score())
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
class MPR_Tree(Tree_extend):
|
|
435
|
+
# supportive class to implement midpoint-reroot (mpr = mid point reroot, hence the name)G
|
|
436
|
+
def __init__(self, ddpTree=None, tree_file=None, schema="newick", logger_id=1, logger_stream=sys.stderr):
|
|
437
|
+
super(MPR_Tree, self).__init__(ddpTree, tree_file, schema)
|
|
438
|
+
self.logger = new_logger("MPR_Tree_" + str(logger_id), myStream=logger_stream)
|
|
439
|
+
self.reset()
|
|
440
|
+
|
|
441
|
+
def reset(self):
|
|
442
|
+
self.max_distance = -1
|
|
443
|
+
self.opt_root = self.ddpTree.root
|
|
444
|
+
self.opt_x = 0
|
|
445
|
+
|
|
446
|
+
def Node_init(self, node, max_in=None, max_out=-1):
|
|
447
|
+
node.max_in = max_in if max_in else [0, 0]
|
|
448
|
+
node.max_out = max_out
|
|
449
|
+
|
|
450
|
+
def Opt_function(self, node):
|
|
451
|
+
m = max(node.max_in)
|
|
452
|
+
curr_max_distance = m + node.max_out
|
|
453
|
+
x = (node.max_out - m) / 2
|
|
454
|
+
if curr_max_distance > self.max_distance and x >= 0 and x <= node.edge_length:
|
|
455
|
+
self.max_distance = curr_max_distance
|
|
456
|
+
self.opt_x = x
|
|
457
|
+
self.opt_root = node
|
|
458
|
+
|
|
459
|
+
def bUp_update(self, node):
|
|
460
|
+
if not node.is_leaf():
|
|
461
|
+
node.max_in = []
|
|
462
|
+
for child in node.child_nodes():
|
|
463
|
+
node.max_in.append(max(child.max_in) + child.edge_length)
|
|
464
|
+
|
|
465
|
+
def tDown_update(self, node, opt_function):
|
|
466
|
+
child_idx = 0
|
|
467
|
+
for child in node.child_nodes():
|
|
468
|
+
child.max_out = max([node.max_out] + [node.max_in[k] for k in range(len(node.max_in))
|
|
469
|
+
if k != child_idx]) + child.edge_length
|
|
470
|
+
opt_function(child)
|
|
471
|
+
child_idx += 1
|
|
472
|
+
|
|
473
|
+
def prepare_root(self):
|
|
474
|
+
pass
|
|
475
|
+
|
|
476
|
+
def compute_threhold(self, k=3.5):
|
|
477
|
+
self.logger.warning("Trying to compute threshold for MPR_Tree, which is not supported.")
|
|
478
|
+
return 0
|
|
479
|
+
|
|
480
|
+
def opt_score(self):
|
|
481
|
+
return self.max_distance / 2
|
|
482
|
+
|
|
483
|
+
def report_score(self):
|
|
484
|
+
return "Tree height: " + str(self.opt_score())
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
########################################################################################################################
|
|
488
|
+
|
|
489
|
+
'''
|
|
490
|
+
logger = logging.getLogger("MinVar")
|
|
491
|
+
logger.setLevel(logging.INFO)
|
|
492
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
493
|
+
formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
|
|
494
|
+
handler.setFormatter(formatter)
|
|
495
|
+
logger.addHandler(handler)
|
|
496
|
+
logger.propagate = False
|
|
497
|
+
'''
|
|
498
|
+
|
|
499
|
+
class minVAR_Base_Tree(Tree_extend):
|
|
500
|
+
# supportive base class to implement VAR-reroot, hence the name
|
|
501
|
+
def __init__(self, ddpTree=None, tree_file=None, schema="newick",logger_id=1,logger_stream=sys.stderr):
|
|
502
|
+
super(minVAR_Base_Tree, self).__init__(ddpTree, tree_file, schema)
|
|
503
|
+
self.logger = new_logger("MinVar_Tree_" + str(logger_id),myStream=logger_stream)
|
|
504
|
+
self.reset()
|
|
505
|
+
|
|
506
|
+
def reset(self):
|
|
507
|
+
self.minVAR = None
|
|
508
|
+
self.opt_root = self.ddpTree.root
|
|
509
|
+
self.opt_x = 0
|
|
510
|
+
|
|
511
|
+
def Node_init(self, node, nleaf=1, sum_in=0, sum_total=0, var=-1):
|
|
512
|
+
node.sum_in = sum_in
|
|
513
|
+
node.sum_total = sum_total
|
|
514
|
+
node.nleaf = nleaf
|
|
515
|
+
node.var = var
|
|
516
|
+
|
|
517
|
+
def Opt_function(self, node, a, b, c):
|
|
518
|
+
self.logger.info("Abstract method! Should never be called")
|
|
519
|
+
|
|
520
|
+
def compute_dRoot_VAR(self):
|
|
521
|
+
cumm = {'ssq': 0, 'sum': 0}
|
|
522
|
+
|
|
523
|
+
def compute_dRoot(node, cumm_l):
|
|
524
|
+
if node.is_leaf():
|
|
525
|
+
cumm['ssq'] += cumm_l ** 2
|
|
526
|
+
cumm['sum'] += cumm_l
|
|
527
|
+
else:
|
|
528
|
+
for child in node.child_nodes():
|
|
529
|
+
compute_dRoot(child, cumm_l + child.edge_length)
|
|
530
|
+
|
|
531
|
+
compute_dRoot(self.get_root(), 0)
|
|
532
|
+
N = self.get_root().nleaf
|
|
533
|
+
root_var = cumm['ssq'] / N - (cumm['sum'] / N) ** 2
|
|
534
|
+
self.get_root().var = root_var
|
|
535
|
+
|
|
536
|
+
def bUp_update(self, node):
|
|
537
|
+
if node.is_leaf():
|
|
538
|
+
node.nleaf = 1
|
|
539
|
+
node.sum_in = 0
|
|
540
|
+
else:
|
|
541
|
+
node.nleaf = 0
|
|
542
|
+
node.sum_in = 0
|
|
543
|
+
for child in node.child_nodes():
|
|
544
|
+
node.nleaf += child.nleaf
|
|
545
|
+
node.sum_in += child.sum_in + child.nleaf * child.edge_length
|
|
546
|
+
|
|
547
|
+
def Update_var(self, child, node, edge_length):
|
|
548
|
+
alpha = 2 * (node.sum_total - 2 * (child.sum_in + child.nleaf * edge_length)) / self.total_leaves
|
|
549
|
+
beta = 1 - 2 * float(child.nleaf) / self.total_leaves
|
|
550
|
+
a = 1 - beta * beta
|
|
551
|
+
b = alpha - 2 * node.sum_total * beta / self.total_leaves
|
|
552
|
+
c = node.var
|
|
553
|
+
child.var = a * edge_length * edge_length + b * edge_length + c
|
|
554
|
+
return a, b, c
|
|
555
|
+
|
|
556
|
+
def tDown_update(self, node, opt_function):
|
|
557
|
+
for child in node.child_nodes():
|
|
558
|
+
child.sum_total = node.sum_total + (self.total_leaves - 2 * child.nleaf) * child.edge_length
|
|
559
|
+
a, b, c = self.Update_var(child, node, child.edge_length)
|
|
560
|
+
opt_function(child, a, b, c)
|
|
561
|
+
|
|
562
|
+
def prepare_root(self):
|
|
563
|
+
root = self.get_root()
|
|
564
|
+
root.sum_total = root.sum_in
|
|
565
|
+
self.compute_dRoot_VAR()
|
|
566
|
+
self.total_leaves = root.nleaf
|
|
567
|
+
|
|
568
|
+
def opt_score(self):
|
|
569
|
+
return self.minVAR
|
|
570
|
+
|
|
571
|
+
def report_score(self):
|
|
572
|
+
return "MinVar score: " + str(self.opt_score())
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
class MVDF_Tree(minVAR_Base_Tree):
|
|
576
|
+
# supportive class to implement VAR-reroot + deepest node + factorization
|
|
577
|
+
def __init__(self, ddpTree=None, tree_file=None, schema="newick"):
|
|
578
|
+
super(MVDF_Tree, self).__init__(ddpTree, tree_file, schema)
|
|
579
|
+
self.deep_node = None
|
|
580
|
+
|
|
581
|
+
def reset(self):
|
|
582
|
+
super(MVDF_Tree, self).reset()
|
|
583
|
+
self.deep_node = None
|
|
584
|
+
|
|
585
|
+
def Opt_function(self, node, a, b, c):
|
|
586
|
+
x = -b / (2 * a)
|
|
587
|
+
if x >= 0 and x <= node.edge_length:
|
|
588
|
+
# curr_minVAR = a*x*x + b*x + c
|
|
589
|
+
factor = float(node.nleaf) / self.total_leaves
|
|
590
|
+
factor = factor * (1 - factor)
|
|
591
|
+
curr_minVAR = (a * x * x + b * x + c) / factor
|
|
592
|
+
|
|
593
|
+
if node.var < node.parent_node.var:
|
|
594
|
+
deep_node = node
|
|
595
|
+
else:
|
|
596
|
+
deep_node = node.parent_node
|
|
597
|
+
|
|
598
|
+
updateNeed = False
|
|
599
|
+
if (self.deep_node is None) or (deep_node.var < self.deep_node.var):
|
|
600
|
+
self.deep_node = deep_node
|
|
601
|
+
self.minVAR = curr_minVAR
|
|
602
|
+
updateNeed = True
|
|
603
|
+
elif (self.deep_node is deep_node) and (curr_minVAR < self.minVAR):
|
|
604
|
+
self.minVAR = curr_minVAR
|
|
605
|
+
updateNeed = True
|
|
606
|
+
|
|
607
|
+
if updateNeed:
|
|
608
|
+
self.opt_root = node
|
|
609
|
+
self.opt_x = node.edge_length - x
|
|
610
|
+
|
|
611
|
+
# self.logger.info(str(curr_minVAR) + "\t" + node.label
|
|
612
|
+
# + "\t" + str(node.edge_length-x) + "\t" + str(self.Tree_records[node.idx].var)
|
|
613
|
+
# + "\t" + (str(node.parent_node.label) if node.parent_node else "None")
|
|
614
|
+
# + "\t" + str(self.Tree_records[node.parent_node.idx].var))
|
|
615
|
+
|
|
616
|
+
def compute_threshold(self, k=3.5):
|
|
617
|
+
# should be called only AFTER the MV root was found
|
|
618
|
+
mean = (self.opt_root.sum_total - self.opt_x *
|
|
619
|
+
(self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
|
|
620
|
+
factor = float(self.opt_root.nleaf) / self.total_leaves
|
|
621
|
+
factor = factor * (1 - factor)
|
|
622
|
+
rootVar = self.minVAR * factor
|
|
623
|
+
self.logger.info(mean)
|
|
624
|
+
self.logger.info(rootVar)
|
|
625
|
+
std = math.sqrt(rootVar)
|
|
626
|
+
return mean + k * std
|
|
627
|
+
|
|
628
|
+
|
|
629
|
+
class MVD0_Tree(minVAR_Base_Tree):
|
|
630
|
+
# supportive class to implement VAR-reroot + deepest node + no factorization
|
|
631
|
+
def __init__(self, ddpTree=None, tree_file=None, schema="newick"):
|
|
632
|
+
super(MVD0_Tree, self).__init__(ddpTree, tree_file, schema)
|
|
633
|
+
self.deep_node = None
|
|
634
|
+
|
|
635
|
+
def reset(self):
|
|
636
|
+
super(MVD0_Tree, self).reset()
|
|
637
|
+
self.deep_node = None
|
|
638
|
+
|
|
639
|
+
def Opt_function(self, node, a, b, c):
|
|
640
|
+
x = -b / (2 * a)
|
|
641
|
+
if x >= 0 and x <= node.edge_length:
|
|
642
|
+
curr_minVAR = a * x * x + b * x + c
|
|
643
|
+
|
|
644
|
+
if node.var < node.parent_node.var:
|
|
645
|
+
deep_node = node
|
|
646
|
+
else:
|
|
647
|
+
deep_node = node.parent_node
|
|
648
|
+
|
|
649
|
+
updateNeed = False
|
|
650
|
+
if (self.deep_node is None) or (deep_node.var < self.deep_node.var):
|
|
651
|
+
self.deep_node = deep_node
|
|
652
|
+
self.minVAR = curr_minVAR
|
|
653
|
+
updateNeed = True
|
|
654
|
+
elif (self.deep_node is deep_node) and (curr_minVAR < self.minVAR):
|
|
655
|
+
self.minVAR = curr_minVAR
|
|
656
|
+
updateNeed = True
|
|
657
|
+
|
|
658
|
+
if updateNeed:
|
|
659
|
+
self.opt_root = node
|
|
660
|
+
self.opt_x = node.edge_length - x
|
|
661
|
+
|
|
662
|
+
# self.logger.info(str(curr_minVAR) + "\t" + node.label
|
|
663
|
+
# + "\t" + str(node.edge_length-x) + "\t" + str(self.Tree_records[node.idx].var)
|
|
664
|
+
# + "\t" + (str(node.parent_node.label) if node.parent_node else "None")
|
|
665
|
+
# + "\t" + str(self.Tree_records[node.parent_node.idx].var))
|
|
666
|
+
|
|
667
|
+
def compute_threshold(self, k=3.5):
|
|
668
|
+
# should be called only AFTER the MV root was found
|
|
669
|
+
mean = (self.opt_root.sum_total - self.opt_x *
|
|
670
|
+
(self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
|
|
671
|
+
self.logger.info(mean)
|
|
672
|
+
self.logger.info(self.minVAR)
|
|
673
|
+
std = math.sqrt(self.minVAR)
|
|
674
|
+
return mean + k * std
|
|
675
|
+
|
|
676
|
+
|
|
677
|
+
class MV0F_Tree(minVAR_Base_Tree):
|
|
678
|
+
# supportive class to implement VAR-reroot + no deepest node + factorization
|
|
679
|
+
# def __init__(self, ddpTree = None, tree_file = None, schema = "newick"):
|
|
680
|
+
# super().__init__(ddpTree, tree_file, schema)
|
|
681
|
+
|
|
682
|
+
def Opt_function(self, node, a, b, c):
|
|
683
|
+
x = -b / (2 * a)
|
|
684
|
+
if x >= 0 and x <= node.edge_length:
|
|
685
|
+
# curr_minVAR = a*x*x + b*x + c
|
|
686
|
+
factor = float(node.nleaf) / self.total_leaves
|
|
687
|
+
factor = factor * (1 - factor)
|
|
688
|
+
curr_minVAR = (a * x * x + b * x + c) / factor
|
|
689
|
+
if self.minVAR is None or curr_minVAR < self.minVAR:
|
|
690
|
+
self.minVAR = curr_minVAR
|
|
691
|
+
self.opt_root = node
|
|
692
|
+
self.opt_x = node.edge_length - x
|
|
693
|
+
|
|
694
|
+
# self.logger.info(str(curr_minVAR) + "\t" + node.label
|
|
695
|
+
# + "\t" + str(node.edge_length-x) + "\t" + str(self.Tree_records[node.idx].var)
|
|
696
|
+
# + "\t" + (str(node.parent_node.label) if node.parent_node else "None")
|
|
697
|
+
# + "\t" + str(self.Tree_records[node.parent_node.idx].var))
|
|
698
|
+
|
|
699
|
+
def compute_threshold(self, k=3.5):
|
|
700
|
+
# should be called only AFTER the MV root was found
|
|
701
|
+
mean = (self.opt_root.sum_total - self.opt_x *
|
|
702
|
+
(self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
|
|
703
|
+
factor = float(self.opt_root.nleaf) / self.total_leaves
|
|
704
|
+
factor = factor * (1 - factor)
|
|
705
|
+
rootVar = self.minVAR * factor
|
|
706
|
+
self.logger.info(mean)
|
|
707
|
+
self.logger.info(rootVar)
|
|
708
|
+
std = math.sqrt(rootVar)
|
|
709
|
+
return mean + k * std
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
class MV00_Tree(minVAR_Base_Tree):
|
|
713
|
+
# supportive class to implement VAR-reroot + no deepest node + no factorization
|
|
714
|
+
# def __init__(self, ddpTree = None, tree_file = None, schema = "newick"):
|
|
715
|
+
# super().__init__(ddpTree, tree_file, schema)
|
|
716
|
+
|
|
717
|
+
def Opt_function(self, node, a, b, c):
|
|
718
|
+
x = -b / (2 * a)
|
|
719
|
+
if x >= 0 and x <= node.edge_length:
|
|
720
|
+
curr_minVAR = a * x * x + b * x + c
|
|
721
|
+
if self.minVAR is None or curr_minVAR < self.minVAR:
|
|
722
|
+
self.minVAR = curr_minVAR
|
|
723
|
+
self.opt_root = node
|
|
724
|
+
self.opt_x = node.edge_length - x
|
|
725
|
+
|
|
726
|
+
def compute_threshold(self, k=3.5):
|
|
727
|
+
# should be called only AFTER the MV root was found
|
|
728
|
+
mean = (self.opt_root.sum_total - self.opt_x *
|
|
729
|
+
(self.total_leaves - 2 * self.opt_root.nleaf)) / self.total_leaves
|
|
730
|
+
self.logger.info(mean)
|
|
731
|
+
self.logger.info(self.minVAR)
|
|
732
|
+
std = math.sqrt(self.minVAR)
|
|
733
|
+
return mean + k * std
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
class MBR_Tree(Tree_extend):
|
|
737
|
+
# supportive class to implement midpoint balance root
|
|
738
|
+
def __init__(self, ddpTree=None, tree_file=None, schema="newick"):
|
|
739
|
+
super(MBR_Tree, self).__init__(ddpTree, tree_file, schema)
|
|
740
|
+
|
|
741
|
+
self.BPs = [] # BPs : balance points
|
|
742
|
+
self.opt_root = self.ddpTree.root
|
|
743
|
+
self.opt_x = 0
|
|
744
|
+
|
|
745
|
+
def Node_init(self, node, nleaf=1, sum_in=0, sum_out=-1):
|
|
746
|
+
self.nleaf = nleaf
|
|
747
|
+
self.sum_in = sum_in
|
|
748
|
+
self.sum_out = sum_out
|
|
749
|
+
|
|
750
|
+
def Opt_function(self, node):
|
|
751
|
+
nleaf = node.nleaf
|
|
752
|
+
mean_in = node.sum_in / nleaf
|
|
753
|
+
mean_out = node.sum_out / (self.total_leaves - nleaf)
|
|
754
|
+
x = (mean_out - mean_in) / 2
|
|
755
|
+
if x >= 0 and x <= node.edge_length:
|
|
756
|
+
self.BPs.append((node, x, mean_in + x))
|
|
757
|
+
node.x = x
|
|
758
|
+
node.mean = mean_in + x
|
|
759
|
+
else:
|
|
760
|
+
node.x = None
|
|
761
|
+
node.mean = None
|
|
762
|
+
|
|
763
|
+
def bUp_update(self, node):
|
|
764
|
+
node.sum_in = 0
|
|
765
|
+
if node.is_leaf():
|
|
766
|
+
node.nleaf = 1
|
|
767
|
+
else:
|
|
768
|
+
node.nleaf = 0
|
|
769
|
+
for child in node.child_nodes():
|
|
770
|
+
node.nleaf += child.nleaf
|
|
771
|
+
node.sum_in += child.sum_in + child.nleaf * child.edge_length
|
|
772
|
+
|
|
773
|
+
def tDown_update(self, node, opt_function):
|
|
774
|
+
child_idx = 0
|
|
775
|
+
for child in node.child_nodes():
|
|
776
|
+
child.sum_out = (node.sum_out + node.sum_in + child.edge_length *
|
|
777
|
+
(self.total_leaves - 2 * child.nleaf) - child.sum_in)
|
|
778
|
+
opt_function(child)
|
|
779
|
+
child_idx += 1
|
|
780
|
+
|
|
781
|
+
def prepare_root(self):
|
|
782
|
+
root = self.get_root()
|
|
783
|
+
root.sum_out = 0
|
|
784
|
+
self.total_leaves = root.nleaf
|
|
785
|
+
root.x = None
|
|
786
|
+
root.mean = None
|
|
787
|
+
|
|
788
|
+
def list_balance_points(self):
|
|
789
|
+
self.Topdown_label()
|
|
790
|
+
self.Bottomup_update()
|
|
791
|
+
self.prepare_root()
|
|
792
|
+
self.Topdown_update()
|
|
793
|
+
|
|
794
|
+
for (node, x, mean) in self.BPs:
|
|
795
|
+
if node.is_leaf():
|
|
796
|
+
# self.logger.info(node.label + "\t" + str(x) + "\t" + str(mean))
|
|
797
|
+
self.logger.info(node.label + "\t" + str(x) + "\t" + str(mean))
|
|
798
|
+
else:
|
|
799
|
+
self.logger.info(node.label + "\t" + str(x) + "\t" + str(mean))
|
|
800
|
+
|
|
801
|
+
def build_balance_tree(self):
|
|
802
|
+
self.Topdown_label() # keep this step for now for debugging purpose
|
|
803
|
+
self.Bottomup_update()
|
|
804
|
+
self.prepare_root()
|
|
805
|
+
self.Topdown_update()
|
|
806
|
+
|
|
807
|
+
# self.list_balance_points()
|
|
808
|
+
|
|
809
|
+
self.balance_tree = self.ddpTree.extract_tree()
|
|
810
|
+
|
|
811
|
+
# bottom up pruning
|
|
812
|
+
for node in self.balance_tree.traverse_postorder():
|
|
813
|
+
node.type = "real"
|
|
814
|
+
node.BPbelow = False
|
|
815
|
+
|
|
816
|
+
'''if node.is_leaf():
|
|
817
|
+
self.logger.info("parent: " + node.label)# + "\t" + str(node.extraction_source.x))
|
|
818
|
+
else:
|
|
819
|
+
self.logger.info("parent: " + node.label)#+ "\t" + str(node.extraction_source.x))'''
|
|
820
|
+
|
|
821
|
+
for ch in node.child_nodes():
|
|
822
|
+
'''try:
|
|
823
|
+
self.logger.info("child: " + ch.label)# + "\t" + str(ch.extraction_source.x))
|
|
824
|
+
except:
|
|
825
|
+
self.logger.info("child: " + ch.label) #+ "\t" + str(ch.extraction_source.x))'''
|
|
826
|
+
|
|
827
|
+
if ch.BPbelow or (ch.extraction_source.x is not None):
|
|
828
|
+
node.BPbelow = True
|
|
829
|
+
# node.BPbelow = node.BPbelow or ch.BPbelow or (ch.extraction_source.x is not None)
|
|
830
|
+
|
|
831
|
+
if not ch.BPbelow:
|
|
832
|
+
# remove the whole clade under ch
|
|
833
|
+
# for ch1 in ch.child_nodes():
|
|
834
|
+
# ch.remove_child(ch1)
|
|
835
|
+
edgelen = ch.edge_length
|
|
836
|
+
node.remove_child(ch)
|
|
837
|
+
|
|
838
|
+
if ch.extraction_source.x is not None:
|
|
839
|
+
# add a new node p at the balance point
|
|
840
|
+
# set p to be a child of node (edge length ch.edge_length - x)
|
|
841
|
+
# add a new node ch1 to be another child of p (edge length ch.mean)
|
|
842
|
+
edgelen = ch.edge_length
|
|
843
|
+
|
|
844
|
+
# p = self.ddpTree.node_factory()
|
|
845
|
+
# ch1 = self.ddpTree.node_factory()
|
|
846
|
+
p = Node()
|
|
847
|
+
ch1 = Node()
|
|
848
|
+
|
|
849
|
+
p.type = "bp" # bp: balance-point
|
|
850
|
+
p.ref_child = ch.extraction_source # link p to the original tree (for later use after finding midpoint)
|
|
851
|
+
ch1.type = "dm" # dm: dummy
|
|
852
|
+
|
|
853
|
+
# node.remove_child(ch)
|
|
854
|
+
node.add_child(p)
|
|
855
|
+
p.add_child(ch1)
|
|
856
|
+
|
|
857
|
+
p.edge_length = edgelen - ch.extraction_source.x
|
|
858
|
+
ch1.edge_length = ch.extraction_source.mean
|
|
859
|
+
|
|
860
|
+
elif ch.extraction_source.x is not None:
|
|
861
|
+
# add a new node p at the balance point
|
|
862
|
+
# set p to be a child of node (edge length ch.edge_length - x)
|
|
863
|
+
# set ch to be a child of p (edge length x)
|
|
864
|
+
# add a new node ch1 to be another child of p (edge length ch.mean)
|
|
865
|
+
|
|
866
|
+
edgelen = ch.edge_length
|
|
867
|
+
|
|
868
|
+
# p = self.ddpTree.node_factory()
|
|
869
|
+
p = Node()
|
|
870
|
+
# ch1 = self.ddpTree.node_factory()
|
|
871
|
+
ch1 = Node()
|
|
872
|
+
|
|
873
|
+
p.type = "bp"
|
|
874
|
+
p.ref_child = ch.extraction_source # link p to the original tree (for later use after finding midpoint)
|
|
875
|
+
ch1.type = "dm"
|
|
876
|
+
|
|
877
|
+
node.remove_child(ch)
|
|
878
|
+
node.add_child(p)
|
|
879
|
+
p.add_child(ch)
|
|
880
|
+
p.add_child(ch1)
|
|
881
|
+
|
|
882
|
+
ch.edge_length = ch.extraction_source.x
|
|
883
|
+
p.edge_length = edgelen - ch.extraction_source.x
|
|
884
|
+
ch1.edge_length = ch.extraction_source.mean
|
|
885
|
+
|
|
886
|
+
# topdown pruning
|
|
887
|
+
node = self.balance_tree.root
|
|
888
|
+
nchild = len(node.child_nodes())
|
|
889
|
+
while nchild > 0 and nchild < 2:
|
|
890
|
+
# node has less than 2 children
|
|
891
|
+
temp = node
|
|
892
|
+
node = node.child_nodes()[0]
|
|
893
|
+
temp.remove_child(node)
|
|
894
|
+
if node.type == "dm":
|
|
895
|
+
node = temp
|
|
896
|
+
break
|
|
897
|
+
nchild = len(node.child_nodes())
|
|
898
|
+
|
|
899
|
+
self.balance_tree.root = node
|
|
900
|
+
self.balance_tree.root.edge_length = None
|
|
901
|
+
# balance_tree.root = None
|
|
902
|
+
|
|
903
|
+
# mptre = MPR_Tree(ddpTree=balance_tree)
|
|
904
|
+
# mptre.tree_as_newick()
|
|
905
|
+
|
|
906
|
+
# return balance_tree
|
|
907
|
+
|
|
908
|
+
def find_root(self):
|
|
909
|
+
self.build_balance_tree()
|
|
910
|
+
mptre = MPR_Tree(ddpTree=self.balance_tree)
|
|
911
|
+
mptre.tree_as_newick()
|
|
912
|
+
mptre.find_root()
|
|
913
|
+
|
|
914
|
+
self.logger.info(mptre.opt_root.type)
|
|
915
|
+
|
|
916
|
+
if mptre.opt_root.type == "bp":
|
|
917
|
+
self.opt_root = mptre.opt_root.ref_child
|
|
918
|
+
self.opt_x = mptre.opt_root.ref_child.x + mptre.opt_x
|
|
919
|
+
elif mptre.opt_root.type == "dm":
|
|
920
|
+
self.logger.info("Hmm... Is it possible that a dummy was found as the opt_root?")
|
|
921
|
+
else:
|
|
922
|
+
self.opt_root = mptre.opt_root.extraction_source
|
|
923
|
+
self.opt_x = mptre.opt_x
|
|
924
|
+
|
|
925
|
+
self.logger.info(self.opt_root.label)
|
|
926
|
+
self.logger.info(self.opt_x)
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
########################################################################################################################
|
|
930
|
+
|
|
931
|
+
logger = logging.getLogger("quadprog_solvers")
|
|
932
|
+
logger.setLevel(logging.INFO)
|
|
933
|
+
handler = logging.StreamHandler(stdout)
|
|
934
|
+
formatter = logging.Formatter('%(levelname)s:%(name)s:%(message)s')
|
|
935
|
+
handler.setFormatter(formatter)
|
|
936
|
+
logger.addHandler(handler)
|
|
937
|
+
logger.propagate = False
|
|
938
|
+
|
|
939
|
+
def cvxopt_solve_qp(P, q, G=None, h=None, A=None, b=None, maxIter=1000):
|
|
940
|
+
P = .5 * (P + P.T) # make sure P is symmetric
|
|
941
|
+
args = [cvxopt.matrix(P), cvxopt.matrix(q)]
|
|
942
|
+
if G is not None:
|
|
943
|
+
args.extend([cvxopt.matrix(G), cvxopt.matrix(h)])
|
|
944
|
+
if A is not None:
|
|
945
|
+
args.extend([cvxopt.matrix(A), cvxopt.matrix(b)])
|
|
946
|
+
sol = cvxopt.solvers.qp(*args,options={'show_progress':False,'maxiters':maxIter})
|
|
947
|
+
if 'optimal' not in sol['status']:
|
|
948
|
+
if "unknown" in sol['status']:
|
|
949
|
+
logger.warning("Couldn't find optimal solution on one branch. Perhaps due to maximum iterations exceeded. Consider increasing the maximum iterations via -x.")
|
|
950
|
+
else:
|
|
951
|
+
logger.warning("Couldn't find optimal solution on one branch. Solution status: " + sol['status'])
|
|
952
|
+
#return None
|
|
953
|
+
return numpy.array(sol['x']).reshape((P.shape[1],))
|
|
954
|
+
|
|
955
|
+
|
|
956
|
+
########################################################################################################################
|
|
957
|
+
|
|
958
|
+
EPSILON = 1e-5
|
|
959
|
+
|
|
960
|
+
class RTT_Tree(Tree_extend):
|
|
961
|
+
# supportive base class to implement RTT-reroot, hence the name
|
|
962
|
+
def __init__(self, smplTimes, ddpTree=None, tree_file=None, schema="newick", logger_id=1, logger_stream=stderr,
|
|
963
|
+
maxIter=1000):
|
|
964
|
+
super(RTT_Tree, self).__init__(ddpTree, tree_file, schema)
|
|
965
|
+
self.logger = new_logger("RTT_Tree_" + str(logger_id), myStream=logger_stream)
|
|
966
|
+
self.smplTimes = smplTimes
|
|
967
|
+
self.reset()
|
|
968
|
+
self.maxIter = maxIter
|
|
969
|
+
|
|
970
|
+
def reset(self):
|
|
971
|
+
self.RTT = None
|
|
972
|
+
self.opt_root = self.ddpTree.root
|
|
973
|
+
self.opt_y = 0
|
|
974
|
+
self.opt_x = 0
|
|
975
|
+
self.opt_mu = 0
|
|
976
|
+
self.tmin = min(self.smplTimes.values())
|
|
977
|
+
|
|
978
|
+
def Node_init(self, node, nleaf=1, SDI=0, SD=0, ST=0, SDT=0, SSD=0):
|
|
979
|
+
node.SDI = SDI
|
|
980
|
+
node.SD = SD
|
|
981
|
+
node.nleaf = nleaf
|
|
982
|
+
node.ST = ST
|
|
983
|
+
node.SDT = SDT
|
|
984
|
+
node.SSD = SSD
|
|
985
|
+
|
|
986
|
+
def Opt_function(self, node, SST, deltaT, deltaD, SDT, SSD, ST, SD):
|
|
987
|
+
n = self.total_leaves
|
|
988
|
+
a, b, c, d, e, f = n, SST, (-2 * deltaT), (2 * deltaD), (-2 * SDT), SSD
|
|
989
|
+
k, m, r = 2 * (n - 2 * node.nleaf), -2 * ST, 2 * SD
|
|
990
|
+
|
|
991
|
+
tmin = self.tmin
|
|
992
|
+
|
|
993
|
+
# use quadprog to compute mu_star, y_star, and x_star
|
|
994
|
+
P = array([[a, k / 2, c / 2.], [k / 2, n, m / 2], [c / 2, m / 2, b]])
|
|
995
|
+
q = array([d / 2., r / 2, e / 2])
|
|
996
|
+
G = array([[-1., 0., 0.], [0., 0., -1.], [1., 0., 0.], [0., 1., -tmin]])
|
|
997
|
+
h = array([0., EPSILON, node.edge_length, 0]).reshape((4,))
|
|
998
|
+
solution = cvxopt_solve_qp(P, q, G, h, maxIter=self.maxIter)
|
|
999
|
+
x_star = solution[0]
|
|
1000
|
+
y_star = solution[1]
|
|
1001
|
+
mu_star = solution[2]
|
|
1002
|
+
curr_RTT = a * x_star * x_star + b * mu_star * mu_star + c * x_star * mu_star + d * x_star + e * mu_star + f + n * y_star * y_star + k * x_star * y_star + m * mu_star * y_star + r * y_star
|
|
1003
|
+
|
|
1004
|
+
if self.RTT is None or (curr_RTT - self.RTT < -EPSILON):
|
|
1005
|
+
self.RTT = curr_RTT
|
|
1006
|
+
self.opt_root = node
|
|
1007
|
+
self.opt_x = node.edge_length - x_star
|
|
1008
|
+
self.opt_y = y_star
|
|
1009
|
+
self.opt_mu = mu_star
|
|
1010
|
+
|
|
1011
|
+
def bUp_update(self, node):
|
|
1012
|
+
if node.is_leaf():
|
|
1013
|
+
node.nleaf = 1
|
|
1014
|
+
node.SDI = 0
|
|
1015
|
+
node.ST = self.smplTimes[node.label]
|
|
1016
|
+
else:
|
|
1017
|
+
node.nleaf = 0
|
|
1018
|
+
node.SDI = 0
|
|
1019
|
+
node.ST = 0
|
|
1020
|
+
for child in node.child_nodes():
|
|
1021
|
+
node.nleaf += child.nleaf
|
|
1022
|
+
node.SDI += child.SDI + child.nleaf * child.edge_length
|
|
1023
|
+
node.ST += child.ST
|
|
1024
|
+
|
|
1025
|
+
def Update_var(self, child, node, edge_length):
|
|
1026
|
+
SST = self.SST
|
|
1027
|
+
deltaT = self.ddpTree.root.ST - 2 * child.ST
|
|
1028
|
+
deltaD = -2 * child.nleaf * edge_length - 2 * child.SDI + node.SD
|
|
1029
|
+
SDT = node.SDT
|
|
1030
|
+
SSD = node.SSD
|
|
1031
|
+
ST = self.ST
|
|
1032
|
+
SD = node.SD
|
|
1033
|
+
return SST, deltaT, deltaD, SDT, SSD, ST, SD
|
|
1034
|
+
|
|
1035
|
+
def tDown_update(self, node, opt_function):
|
|
1036
|
+
for child in node.child_nodes():
|
|
1037
|
+
child.SD = node.SD + (self.total_leaves - 2 * child.nleaf) * child.edge_length
|
|
1038
|
+
child.SDT = node.SDT + child.edge_length * (self.ddpTree.root.ST - 2 * child.ST)
|
|
1039
|
+
child.SSD = node.SSD + (self.total_leaves - 4 * child.nleaf) * (child.edge_length ** 2) + 2 * (
|
|
1040
|
+
node.SD - 2 * child.SDI) * child.edge_length
|
|
1041
|
+
SST, deltaT, deltaD, SDT, SSD, ST, SD = self.Update_var(child, node, child.edge_length)
|
|
1042
|
+
opt_function(child, SST, deltaT, deltaD, SDT, SSD, ST, SD)
|
|
1043
|
+
|
|
1044
|
+
def prepare_root(self):
|
|
1045
|
+
root = self.get_root()
|
|
1046
|
+
root.SD = root.SDI
|
|
1047
|
+
self.total_leaves = root.nleaf
|
|
1048
|
+
self.ST = root.ST
|
|
1049
|
+
self.ddpTree.root.droot = 0
|
|
1050
|
+
self.ddpTree.root.troot = 0
|
|
1051
|
+
root.SD, root.SSD, root.SDT, self.SST = 0, 0, 0, 0
|
|
1052
|
+
for v in self.ddpTree.traverse_preorder():
|
|
1053
|
+
if not v.is_root():
|
|
1054
|
+
# must have defined edge lengths
|
|
1055
|
+
v.droot = v.parent.droot + v.edge_length
|
|
1056
|
+
if v.is_leaf():
|
|
1057
|
+
root.SSD += (v.droot ** 2)
|
|
1058
|
+
self.SST += (self.smplTimes[v.label] ** 2)
|
|
1059
|
+
root.SD += v.droot
|
|
1060
|
+
root.SDT += (v.droot * self.smplTimes[v.label])
|
|
1061
|
+
|
|
1062
|
+
def opt_score(self):
|
|
1063
|
+
return self.RTT
|
|
1064
|
+
|
|
1065
|
+
def return_values(self):
|
|
1066
|
+
# returns RTT score, mu, t0
|
|
1067
|
+
return (self.opt_score() / self.total_leaves), (self.opt_mu), (self.opt_y / self.opt_mu)
|
|
1068
|
+
|
|
1069
|
+
def report_score(self):
|
|
1070
|
+
return "RTT=" + str(self.opt_score() / self.total_leaves) + "\tmu=" + str(self.opt_mu) + "\tt0=" + str(
|
|
1071
|
+
self.opt_y / self.opt_mu)
|
|
1072
|
+
|
|
1073
|
+
|
|
1074
|
+
########################################################################################################################
|
|
1075
|
+
|
|
1076
|
+
|
|
1077
|
+
def main():
|
|
1078
|
+
|
|
1079
|
+
parser = argparse.ArgumentParser()
|
|
1080
|
+
parser.add_argument('-i', '--input', required=False, type=argparse.FileType('r'), default=stdin, help="Input File (default is STDIN)")
|
|
1081
|
+
parser.add_argument('-m', '--method', required=False, type=str, default="MV", help="Method (MP for midpoint, MV for minVAR, OG for outgroup, RTT for root-to-tip) (default is MV)")
|
|
1082
|
+
parser.add_argument('-g', '--outgroups', required=False, type=str, help="Specify the outgroups. If specifying a list of outgroups, put them between quotes (i.e. \"). Otherwise, specifying a file which containts all the outgroups. Can only be used with -m OG")
|
|
1083
|
+
parser.add_argument('-t', '--smplTimes', required=False, type=argparse.FileType('r'), help="The file containing the sampling times at leaves; to be used with -m RTT")
|
|
1084
|
+
parser.add_argument('-o', '--outfile', required=False, type=argparse.FileType('w'), default=stdout, help="Output File (default is STDOUT)")
|
|
1085
|
+
parser.add_argument('-s', '--schema', required=False, type=str, default="newick", help="Schema of your input treefile (default is newick)")
|
|
1086
|
+
parser.add_argument('-f', '--infofile', required=False, type=argparse.FileType('w'), default=None, help="Save all the logging to this file. Default: print to stderr")
|
|
1087
|
+
parser.add_argument("-v", "--version", action='version', version=PROGRAM_NAME + " " + PROGRAM_VERSION, help="Show FastRoot version and exit")
|
|
1088
|
+
parser.add_argument("-x", "--maxIter", required=False, type=int, default=None, help="Maximum number of iterations to run cvxopt")
|
|
1089
|
+
|
|
1090
|
+
# print help message if no argument is given
|
|
1091
|
+
if len(argv) == 1:
|
|
1092
|
+
logger = new_logger(__name__)
|
|
1093
|
+
logger.info("Running " + PROGRAM_NAME + " version " + PROGRAM_VERSION)
|
|
1094
|
+
parser.print_help()
|
|
1095
|
+
exit(0)
|
|
1096
|
+
|
|
1097
|
+
args = parser.parse_args()
|
|
1098
|
+
stream = args.infofile if args.infofile else stderr
|
|
1099
|
+
logger = new_logger(__name__, myStream=stream)
|
|
1100
|
+
logger.info("Running " + PROGRAM_NAME + " version " + PROGRAM_VERSION)
|
|
1101
|
+
new_argv = re.sub(' +', ' ', " ".join(argv).replace("\n", " "))
|
|
1102
|
+
logger.info(PROGRAM_NAME + " was called as follows: " + new_argv)
|
|
1103
|
+
|
|
1104
|
+
METHOD2FUNC = {'MP': MPR_Tree, 'MV': MV00_Tree, 'OG': OGR_Tree, 'RTT': RTT_Tree}
|
|
1105
|
+
METHOD2DESC = {'MP': "Midpoint", 'MV': "MinVar", 'OG': "Outgroup", 'RTT': "Root-to-Tip"}
|
|
1106
|
+
method = args.method.upper()
|
|
1107
|
+
|
|
1108
|
+
# reading outgroups
|
|
1109
|
+
if args.outgroups:
|
|
1110
|
+
if method != 'OG':
|
|
1111
|
+
method = 'OG'
|
|
1112
|
+
logger.warning("The rooting method is set to outgroup rooting (OG) due to the presence of outgroups")
|
|
1113
|
+
if os.path.exists(args.outgroups):
|
|
1114
|
+
OGs = []
|
|
1115
|
+
for line in open(args.outgroups, 'r'):
|
|
1116
|
+
OGs.append(line.strip())
|
|
1117
|
+
else:
|
|
1118
|
+
OGs = args.outgroups.split()
|
|
1119
|
+
else:
|
|
1120
|
+
OGs = None
|
|
1121
|
+
|
|
1122
|
+
# reading sampling times
|
|
1123
|
+
if args.smplTimes:
|
|
1124
|
+
smplTimes = {}
|
|
1125
|
+
for line in args.smplTimes:
|
|
1126
|
+
sp, t = line.strip().split()
|
|
1127
|
+
smplTimes[sp] = float(t)
|
|
1128
|
+
if method != 'RTT':
|
|
1129
|
+
method = 'RTT'
|
|
1130
|
+
logger.warning(
|
|
1131
|
+
"The rooting method is set to root-to-tip rooting (RTT) due to the presence of sampling times")
|
|
1132
|
+
|
|
1133
|
+
if method == 'RTT' and args.smplTimes is None:
|
|
1134
|
+
logger.error("Need sampling times for root-to-tip rooting")
|
|
1135
|
+
exit()
|
|
1136
|
+
elif method == 'OG' and args.outgroups is None:
|
|
1137
|
+
logger.error("Need outgroups for outgroup rooting")
|
|
1138
|
+
exit()
|
|
1139
|
+
|
|
1140
|
+
assert method in METHOD2FUNC, "Invalid method! Valid options: MP for midpoint, MV for minVAR, OG for outgroups, RTT for root-to-tip"
|
|
1141
|
+
logger.info("Rooting Method: " + METHOD2DESC[method] + " Rooting")
|
|
1142
|
+
|
|
1143
|
+
maxIter = 0
|
|
1144
|
+
if method == 'RTT':
|
|
1145
|
+
if args.maxIter and args.maxIter < 1000:
|
|
1146
|
+
logger.warning(
|
|
1147
|
+
"Invalid number of maximum iterations (-x). Must be at least 1000. Set back to 1000 by default.")
|
|
1148
|
+
maxIter = max(1000, args.maxIter) if args.maxIter else 1000
|
|
1149
|
+
logger.info("Maximum iterations: " + str(maxIter))
|
|
1150
|
+
elif args.maxIter is not None:
|
|
1151
|
+
logger.warning("The maximum number of iterations (-x) is only used with root-to-tip rooting (RTT)")
|
|
1152
|
+
|
|
1153
|
+
# read and root each tree
|
|
1154
|
+
for i, line in enumerate(args.input):
|
|
1155
|
+
tree = read_tree(line, schema=args.schema.lower())
|
|
1156
|
+
if method == 'OG':
|
|
1157
|
+
a_tree = OGR_Tree(OGs, ddpTree=tree, logger_id=i + 1, logger_stream=stream)
|
|
1158
|
+
elif method == 'RTT':
|
|
1159
|
+
a_tree = RTT_Tree(smplTimes, ddpTree=tree, logger_id=i + 1, logger_stream=stream, maxIter=maxIter)
|
|
1160
|
+
else:
|
|
1161
|
+
a_tree = METHOD2FUNC[method](ddpTree=tree, logger_id=i + 1, logger_stream=stream)
|
|
1162
|
+
|
|
1163
|
+
a_tree.Reroot()
|
|
1164
|
+
logger.info("Tree " + str(i + 1) + " " + a_tree.report_score())
|
|
1165
|
+
a_tree.tree_as_newick(outstream=args.outfile)
|
|
1166
|
+
|
|
1167
|
+
|
|
1168
|
+
if __name__ == "__main__":
|
|
1169
|
+
main()
|
|
1170
|
+
|
|
1171
|
+
|
|
1172
|
+
'''
|
|
1173
|
+
|
|
1174
|
+
|
|
1175
|
+
'''
|