treesak 1.53.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- TreeSAK/ALE.py +63 -0
- TreeSAK/ALE1.py +268 -0
- TreeSAK/ALE2.py +168 -0
- TreeSAK/ALE2RTC.py +30 -0
- TreeSAK/ALE3.py +205 -0
- TreeSAK/ALE4.py +636 -0
- TreeSAK/ALE5.py +210 -0
- TreeSAK/ALE6.py +401 -0
- TreeSAK/ALE7.py +126 -0
- TreeSAK/ALE_backup.py +1081 -0
- TreeSAK/AssessCVG.py +128 -0
- TreeSAK/AssessMarker.py +306 -0
- TreeSAK/AssessMarkerDeltaLL.py +257 -0
- TreeSAK/AssessMarkerPA.py +317 -0
- TreeSAK/AssessPB.py +113 -0
- TreeSAK/BMGE.jar +0 -0
- TreeSAK/BMGE.py +49 -0
- TreeSAK/C60SR4.nex +127 -0
- TreeSAK/CompareMCMC.py +138 -0
- TreeSAK/ConcateMSA.py +111 -0
- TreeSAK/ConvertMSA.py +135 -0
- TreeSAK/Dir.rb +82 -0
- TreeSAK/ExtractMarkerSeq.py +263 -0
- TreeSAK/FastRoot.py +1175 -0
- TreeSAK/FastRoot_backup.py +1122 -0
- TreeSAK/FigTree.py +34 -0
- TreeSAK/GTDB_tree.py +76 -0
- TreeSAK/GeneTree.py +142 -0
- TreeSAK/KEGG_Luo17.py +807 -0
- TreeSAK/LcaToLeaves.py +66 -0
- TreeSAK/MarkerRef2Tree.py +616 -0
- TreeSAK/MarkerRef2Tree_backup.py +628 -0
- TreeSAK/MarkerSeq2Tree.py +299 -0
- TreeSAK/MarkerSeq2Tree_backup.py +259 -0
- TreeSAK/ModifyTopo.py +116 -0
- TreeSAK/Newick_tree_plotter.py +79 -0
- TreeSAK/OMA.py +170 -0
- TreeSAK/OMA2.py +212 -0
- TreeSAK/OneLineAln.py +50 -0
- TreeSAK/PB.py +155 -0
- TreeSAK/PMSF.py +115 -0
- TreeSAK/PhyloBiAssoc.R +84 -0
- TreeSAK/PhyloBiAssoc.py +167 -0
- TreeSAK/PlotMCMC.py +41 -0
- TreeSAK/PlotMcmcNode.py +152 -0
- TreeSAK/PlotMcmcNode_old.py +252 -0
- TreeSAK/RootTree.py +101 -0
- TreeSAK/RootTreeGTDB.py +371 -0
- TreeSAK/RootTreeGTDB214.py +288 -0
- TreeSAK/RootTreeGTDB220.py +300 -0
- TreeSAK/SequentialDating.py +16 -0
- TreeSAK/SingleAleHGT.py +157 -0
- TreeSAK/SingleLinePhy.py +50 -0
- TreeSAK/SliceMSA.py +142 -0
- TreeSAK/SplitScore.py +21 -0
- TreeSAK/SplitScore1.py +177 -0
- TreeSAK/SplitScore1OMA.py +148 -0
- TreeSAK/SplitScore2.py +608 -0
- TreeSAK/TaxaCountStats.R +256 -0
- TreeSAK/TaxonTree.py +47 -0
- TreeSAK/TreeSAK_config.py +32 -0
- TreeSAK/VERSION +164 -0
- TreeSAK/VisHPD95.R +45 -0
- TreeSAK/VisHPD95.py +200 -0
- TreeSAK/__init__.py +0 -0
- TreeSAK/ale_parser.py +74 -0
- TreeSAK/ale_splitter.py +63 -0
- TreeSAK/alignment_pruner.pl +1471 -0
- TreeSAK/assessOG.py +45 -0
- TreeSAK/batch_itol.py +171 -0
- TreeSAK/catfasta2phy.py +140 -0
- TreeSAK/cogTree.py +185 -0
- TreeSAK/compare_trees.R +30 -0
- TreeSAK/compare_trees.py +255 -0
- TreeSAK/dating.py +264 -0
- TreeSAK/dating_ss.py +361 -0
- TreeSAK/deltall.py +82 -0
- TreeSAK/do_rrtc.rb +464 -0
- TreeSAK/fa2phy.py +42 -0
- TreeSAK/filter_rename_ar53.py +118 -0
- TreeSAK/format_leaf_name.py +70 -0
- TreeSAK/gap_stats.py +38 -0
- TreeSAK/get_SCG_tree.py +742 -0
- TreeSAK/get_arCOG_seq.py +97 -0
- TreeSAK/global_functions.py +222 -0
- TreeSAK/gnm_leaves.py +43 -0
- TreeSAK/iTOL.py +791 -0
- TreeSAK/iTOL_gene_tree.py +80 -0
- TreeSAK/itol_msa_stats.py +56 -0
- TreeSAK/keep_highest_rrtc.py +37 -0
- TreeSAK/koTree.py +194 -0
- TreeSAK/label_gene_tree_by_gnm.py +34 -0
- TreeSAK/label_tree.R +75 -0
- TreeSAK/label_tree.py +121 -0
- TreeSAK/mad.py +708 -0
- TreeSAK/mcmc2tree.py +58 -0
- TreeSAK/mcmcTC copy.py +92 -0
- TreeSAK/mcmcTC.py +104 -0
- TreeSAK/mcmctree_vs_reltime.R +44 -0
- TreeSAK/mcmctree_vs_reltime.py +252 -0
- TreeSAK/merge_pdf.py +32 -0
- TreeSAK/pRTC.py +56 -0
- TreeSAK/parse_mcmctree.py +198 -0
- TreeSAK/parse_reltime.py +141 -0
- TreeSAK/phy2fa.py +37 -0
- TreeSAK/plot_distruibution_th.py +165 -0
- TreeSAK/prep_mcmctree_ctl.py +92 -0
- TreeSAK/print_leaves.py +32 -0
- TreeSAK/pruneMSA.py +63 -0
- TreeSAK/recode.py +73 -0
- TreeSAK/remove_bias.R +112 -0
- TreeSAK/rename_leaves.py +78 -0
- TreeSAK/replace_clade.py +55 -0
- TreeSAK/root_with_out_group.py +84 -0
- TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
- TreeSAK/subsample_drep_gnms.py +74 -0
- TreeSAK/subset.py +69 -0
- TreeSAK/subset_tree_stupid_old_way.py +193 -0
- TreeSAK/supertree.py +330 -0
- TreeSAK/tmp_1.py +19 -0
- TreeSAK/tmp_2.py +19 -0
- TreeSAK/tmp_3.py +120 -0
- TreeSAK/tmp_4.py +43 -0
- TreeSAK/tmp_5.py +12 -0
- TreeSAK/weighted_rand.rb +23 -0
- treesak-1.53.3.data/scripts/TreeSAK +955 -0
- treesak-1.53.3.dist-info/LICENSE +674 -0
- treesak-1.53.3.dist-info/METADATA +27 -0
- treesak-1.53.3.dist-info/RECORD +131 -0
- treesak-1.53.3.dist-info/WHEEL +5 -0
- treesak-1.53.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import glob
|
|
3
|
+
import argparse
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from ete3 import Tree
|
|
6
|
+
import plotly.express as px
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
PlotMcmcNode_usage = '''
|
|
10
|
+
============================ PlotMcmcNode example commands ============================
|
|
11
|
+
|
|
12
|
+
TreeSAK PlotMcmcNode -i McmcTree_op_files -n n179 -o Clock2_n179.pdf
|
|
13
|
+
TreeSAK PlotMcmcNode -i McmcTree_op_files -n n161,n186 -o Clock3_n161_n186.pdf
|
|
14
|
+
TreeSAK PlotMcmcNode -i McmcTree_op_files -n nodes.txt -o multi_runs_multi_nodes.pdf
|
|
15
|
+
|
|
16
|
+
# File name of the mcmc.txt and the corresponding mcmc out file need to follow
|
|
17
|
+
# the rule as specified below:
|
|
18
|
+
[setting_1]_mcmc.txt
|
|
19
|
+
[setting_1]_out.txt
|
|
20
|
+
[setting_2]_mcmc.txt
|
|
21
|
+
[setting_2]_out.txt
|
|
22
|
+
|
|
23
|
+
# file format (-n, tab separated)
|
|
24
|
+
# leave the 2nd column blank for nodes without renaming
|
|
25
|
+
setting_1 node1 Bacteria
|
|
26
|
+
setting_2 node2
|
|
27
|
+
setting_3 node3,node9 Archaea
|
|
28
|
+
|
|
29
|
+
# Y-axis label file format (-l, tab separated)
|
|
30
|
+
PA_75_DeltaLL_50_clock3_mcmc.txt DeltaLL_50
|
|
31
|
+
PA_75_DeltaLL_75_clock3_mcmc.txt DeltaLL_75
|
|
32
|
+
|
|
33
|
+
=======================================================================================
|
|
34
|
+
'''
|
|
35
|
+
|
|
36
|
+
def sep_path_basename_ext(file_in):
|
|
37
|
+
|
|
38
|
+
# separate path and file name
|
|
39
|
+
f_path, file_name = os.path.split(file_in)
|
|
40
|
+
if f_path == '':
|
|
41
|
+
f_path = '.'
|
|
42
|
+
|
|
43
|
+
# separate file basename and extension
|
|
44
|
+
f_base, f_ext = os.path.splitext(file_name)
|
|
45
|
+
|
|
46
|
+
return f_path, f_base, f_ext
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def mcmctree_out_to_tree_str(mamctree_out):
|
|
50
|
+
|
|
51
|
+
# get tree string from mcmctree_out
|
|
52
|
+
tree_str = ''
|
|
53
|
+
tree_line = 0
|
|
54
|
+
current_line = 1
|
|
55
|
+
for each_line in open(mamctree_out):
|
|
56
|
+
if 'Species tree for FigTree. Branch lengths = posterior mean times; 95% CIs = labels' in each_line:
|
|
57
|
+
tree_line = current_line + 1
|
|
58
|
+
if tree_line == current_line:
|
|
59
|
+
tree_str = each_line.strip()
|
|
60
|
+
current_line += 1
|
|
61
|
+
|
|
62
|
+
tree_str_no_space = tree_str.replace(' ', '')
|
|
63
|
+
|
|
64
|
+
# rename tree nodes
|
|
65
|
+
t = Tree(tree_str_no_space, format=1)
|
|
66
|
+
for each_node in t.traverse():
|
|
67
|
+
if each_node.is_leaf():
|
|
68
|
+
node_name_new = '_'.join(each_node.name.split('_')[1:])
|
|
69
|
+
else:
|
|
70
|
+
node_name_new = 't_n%s' % each_node.name
|
|
71
|
+
each_node.name = node_name_new
|
|
72
|
+
|
|
73
|
+
tree_str_renamed = t.write(format=8)
|
|
74
|
+
|
|
75
|
+
return tree_str_renamed
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def plot_distribution(df_txt, output_plot):
|
|
79
|
+
|
|
80
|
+
df = pd.read_table(df_txt, sep=',')
|
|
81
|
+
run_id_list = df['Setting'].unique()
|
|
82
|
+
node_id_list = df['Node'].unique()
|
|
83
|
+
|
|
84
|
+
# sort dataframe by run id
|
|
85
|
+
df = df.sort_values(by='Setting', ascending=False)
|
|
86
|
+
|
|
87
|
+
plot_width = 900
|
|
88
|
+
plot_height = len(run_id_list)*100
|
|
89
|
+
if plot_height < 360:
|
|
90
|
+
plot_height = 360
|
|
91
|
+
|
|
92
|
+
fig = px.violin(df, x="Value", y="Setting", color="Node", points=False, orientation="h", width=plot_width, height=plot_height)
|
|
93
|
+
if len(node_id_list) == 1:
|
|
94
|
+
fig.update_traces(side="positive", fillcolor='lightblue', width=1.6, opacity=0.75)
|
|
95
|
+
else:
|
|
96
|
+
fig.update_traces(side="positive", fillcolor='rgba(0,0,0,0)', width=1.6)
|
|
97
|
+
|
|
98
|
+
fig.update_traces(showlegend=True)
|
|
99
|
+
fig.layout.template = "simple_white"
|
|
100
|
+
# fig.layout.width = 700
|
|
101
|
+
# fig.layout.height = 750
|
|
102
|
+
# fig.update_xaxes(range=[40, 0])
|
|
103
|
+
# fig.update_layout(margin_t=10, title_text='Demo', title_x=0.5)
|
|
104
|
+
fig.write_image(output_plot)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_internal_node_to_plot(node_txt, mo_file):
|
|
108
|
+
|
|
109
|
+
tree_str = ''
|
|
110
|
+
if os.path.isfile(mo_file):
|
|
111
|
+
tree_str = mcmctree_out_to_tree_str(mo_file)
|
|
112
|
+
|
|
113
|
+
# get nodes to plot
|
|
114
|
+
node_set = set()
|
|
115
|
+
node_rename_dict = dict()
|
|
116
|
+
if os.path.isfile(node_txt) is True:
|
|
117
|
+
for each in open(node_txt):
|
|
118
|
+
each_split = each.strip().split('\t')
|
|
119
|
+
node_str = each_split[0]
|
|
120
|
+
|
|
121
|
+
# get internal_node_to_plot
|
|
122
|
+
internal_node_to_plot = ''
|
|
123
|
+
if ',' not in node_str:
|
|
124
|
+
internal_node_to_plot = each_split[0]
|
|
125
|
+
else:
|
|
126
|
+
leaf_list = node_str.split(',')
|
|
127
|
+
if tree_str == '':
|
|
128
|
+
print('*out.txt file not found, program exited!')
|
|
129
|
+
exit()
|
|
130
|
+
current_lca = Tree(tree_str, format=1).get_common_ancestor(leaf_list)
|
|
131
|
+
internal_node_to_plot = current_lca.name
|
|
132
|
+
|
|
133
|
+
# add internal_node_to_plot to node_set
|
|
134
|
+
if internal_node_to_plot != '':
|
|
135
|
+
node_set.add(internal_node_to_plot)
|
|
136
|
+
|
|
137
|
+
# read in name to show in plot
|
|
138
|
+
if len(each_split) == 2:
|
|
139
|
+
if each_split[1] != '':
|
|
140
|
+
node_rename_dict[internal_node_to_plot] = each_split[1]
|
|
141
|
+
else:
|
|
142
|
+
node_set = node_txt.split(',')
|
|
143
|
+
|
|
144
|
+
return node_set, node_rename_dict, tree_str
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def PlotMcmcNode(args):
|
|
148
|
+
|
|
149
|
+
mcmc_in = args['i']
|
|
150
|
+
node_txt = args['n']
|
|
151
|
+
output_plot = args['o']
|
|
152
|
+
specified_out_file = args['of']
|
|
153
|
+
y_label_txt = args['l']
|
|
154
|
+
keep_tmp_file = args['tmp']
|
|
155
|
+
|
|
156
|
+
# check MCMCTree output file/dir
|
|
157
|
+
if os.path.isfile(mcmc_in) is True:
|
|
158
|
+
mcmc_file_list = [mcmc_in]
|
|
159
|
+
else:
|
|
160
|
+
mcmc_file_re = '%s/*_mcmc.txt' % (mcmc_in)
|
|
161
|
+
mcmc_file_list = glob.glob(mcmc_file_re)
|
|
162
|
+
|
|
163
|
+
if len(mcmc_file_list) == 0:
|
|
164
|
+
print('*mcmc.txt file not found, program exited!')
|
|
165
|
+
exit()
|
|
166
|
+
|
|
167
|
+
if specified_out_file is None:
|
|
168
|
+
missed_out_file_list = []
|
|
169
|
+
for each_mcmc_file in mcmc_file_list:
|
|
170
|
+
pwd_out_file = each_mcmc_file.replace('_mcmc.txt', '_out.txt')
|
|
171
|
+
if os.path.isfile(pwd_out_file) is False:
|
|
172
|
+
missed_out_file_list.append(pwd_out_file)
|
|
173
|
+
if len(missed_out_file_list) > 0:
|
|
174
|
+
print('The following *out.txt files are missing, program exited!')
|
|
175
|
+
print('\n'.join(sorted(missed_out_file_list)))
|
|
176
|
+
exit()
|
|
177
|
+
|
|
178
|
+
# read in y-axis label file
|
|
179
|
+
y_label_dict = dict()
|
|
180
|
+
if y_label_txt is not None:
|
|
181
|
+
for each_sample in open(y_label_txt):
|
|
182
|
+
each_sample_split = each_sample.strip().split('\t')
|
|
183
|
+
if len(each_sample_split) == 2:
|
|
184
|
+
y_label_dict[each_sample_split[0]] = each_sample_split[1]
|
|
185
|
+
else:
|
|
186
|
+
print('Format error: %s' % y_label_txt)
|
|
187
|
+
exit()
|
|
188
|
+
|
|
189
|
+
_, f_base, _ = sep_path_basename_ext(output_plot)
|
|
190
|
+
found_matched_node = False
|
|
191
|
+
op_tree_tmp = '%s_tree.txt' % f_base
|
|
192
|
+
op_df_tmp = '%s_data.txt' % f_base
|
|
193
|
+
op_label_tmp = '%s_label.txt' % f_base
|
|
194
|
+
|
|
195
|
+
op_label_tmp_handle = open(op_label_tmp, 'w')
|
|
196
|
+
op_tree_tmp_handle = open(op_tree_tmp, 'w')
|
|
197
|
+
op_df_tmp_handle = open(op_df_tmp, 'w')
|
|
198
|
+
op_df_tmp_handle.write('Value,Node,Setting\n')
|
|
199
|
+
for mcmc_file in mcmc_file_list:
|
|
200
|
+
|
|
201
|
+
mcmc_file_no_path = mcmc_file
|
|
202
|
+
if '/' in mcmc_file_no_path:
|
|
203
|
+
mcmc_file_no_path = mcmc_file_no_path.split('/')[-1]
|
|
204
|
+
|
|
205
|
+
if specified_out_file is None:
|
|
206
|
+
pwd_current_run_mcmc_out = mcmc_file.replace('_mcmc.txt', '_out.txt')
|
|
207
|
+
else:
|
|
208
|
+
pwd_current_run_mcmc_out = specified_out_file
|
|
209
|
+
node_set, node_rename_dict, tree_str = get_internal_node_to_plot(node_txt, pwd_current_run_mcmc_out)
|
|
210
|
+
op_tree_tmp_handle.write('%s\t%s\n' % (mcmc_file_no_path.replace('_mcmc.txt', ''), tree_str))
|
|
211
|
+
label_to_write = y_label_dict.get(mcmc_file_no_path, mcmc_file_no_path)
|
|
212
|
+
mcmc_df = pd.read_table(mcmc_file, index_col=0)
|
|
213
|
+
for each_col in mcmc_df:
|
|
214
|
+
if each_col in node_set:
|
|
215
|
+
node_name_to_write = node_rename_dict.get(each_col, each_col)
|
|
216
|
+
found_matched_node = True
|
|
217
|
+
value_list = mcmc_df[each_col].values
|
|
218
|
+
for each_value in value_list:
|
|
219
|
+
op_df_tmp_handle.write('%s,%s,%s\n' % (each_value, node_name_to_write, label_to_write))
|
|
220
|
+
|
|
221
|
+
op_label_tmp_handle.write('%s\t%s\t%s\n' % (label_to_write, each_col, node_name_to_write))
|
|
222
|
+
op_df_tmp_handle.close()
|
|
223
|
+
op_label_tmp_handle.close()
|
|
224
|
+
op_tree_tmp_handle.close()
|
|
225
|
+
|
|
226
|
+
if found_matched_node is False:
|
|
227
|
+
print('Provided node(s) not found, program exited!')
|
|
228
|
+
exit()
|
|
229
|
+
|
|
230
|
+
# plot distribution
|
|
231
|
+
plot_distribution(op_df_tmp, output_plot)
|
|
232
|
+
|
|
233
|
+
# remove tmp files
|
|
234
|
+
if keep_tmp_file is False:
|
|
235
|
+
os.system('rm %s' % op_tree_tmp)
|
|
236
|
+
os.system('rm %s' % op_df_tmp)
|
|
237
|
+
os.system('rm %s' % op_label_tmp)
|
|
238
|
+
|
|
239
|
+
print('Plot exported to %s, done!' % output_plot)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
if __name__ == '__main__':
|
|
243
|
+
|
|
244
|
+
PlotMcmcNode_parser = argparse.ArgumentParser()
|
|
245
|
+
PlotMcmcNode_parser.add_argument('-i', required=True, help='folder holds the *mcmc.txt and *out.txt files')
|
|
246
|
+
PlotMcmcNode_parser.add_argument('-of', required=False, default=None, help='the *out.txt file')
|
|
247
|
+
PlotMcmcNode_parser.add_argument('-n', required=True, help='Nodes to plot')
|
|
248
|
+
PlotMcmcNode_parser.add_argument('-l', required=False, default=None, help='labels on y axis')
|
|
249
|
+
PlotMcmcNode_parser.add_argument('-o', required=True, help='Output plot')
|
|
250
|
+
PlotMcmcNode_parser.add_argument('-tmp', required=False, action="store_true", help='keep tmp files')
|
|
251
|
+
args = vars(PlotMcmcNode_parser.parse_args())
|
|
252
|
+
PlotMcmcNode(args)
|
TreeSAK/RootTree.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import random
|
|
2
|
+
import dendropy
|
|
3
|
+
import argparse
|
|
4
|
+
from ete3 import Tree
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
RootTree_usage = '''
|
|
8
|
+
====================== RootTree example commands ======================
|
|
9
|
+
|
|
10
|
+
TreeSAK RootTree -i input.tree -og outgroup_genomes.txt -o rooted.tree
|
|
11
|
+
|
|
12
|
+
=======================================================================
|
|
13
|
+
'''
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def root_with_outgroup(input_tree, out_group_list, add_root_branch, tree_file_rooted):
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
Reroot the tree using the given outgroup.
|
|
20
|
+
modified based on: https://github.com/Ecogenomics/GTDBTk/blob/master/gtdbtk/reroot_tree.py
|
|
21
|
+
|
|
22
|
+
input_tree: File containing Newick tree to rerooted.
|
|
23
|
+
output_tree: Name of file for rerooted tree.
|
|
24
|
+
outgroup: Labels of taxa in outgroup.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
tree = dendropy.Tree.get_from_path(input_tree, schema='newick', rooting='force-rooted', preserve_underscores=True)
|
|
28
|
+
|
|
29
|
+
outgroup_in_tree = set()
|
|
30
|
+
ingroup_leaves = set()
|
|
31
|
+
for n in tree.leaf_node_iter():
|
|
32
|
+
if n.taxon.label in out_group_list:
|
|
33
|
+
outgroup_in_tree.add(n.taxon)
|
|
34
|
+
else:
|
|
35
|
+
ingroup_leaves.add(n)
|
|
36
|
+
|
|
37
|
+
# Since finding the MRCA is a rooted tree operation, the tree is first rerooted on an ingroup taxa. This
|
|
38
|
+
# ensures the MRCA of the outgroup can be identified so long as the outgroup is monophyletic. If the
|
|
39
|
+
# outgroup is polyphyletic trying to root on it is ill-defined. To try and pick a "good" root for
|
|
40
|
+
# polyphyletic outgroups, random ingroup taxa are selected until two of them give the same size
|
|
41
|
+
# lineage. This will, likely, be the smallest bipartition possible for the given outgroup though
|
|
42
|
+
# this is not guaranteed.
|
|
43
|
+
|
|
44
|
+
mrca = tree.mrca(taxa=outgroup_in_tree)
|
|
45
|
+
mrca_leaves = len(mrca.leaf_nodes())
|
|
46
|
+
while True:
|
|
47
|
+
rnd_ingroup = random.sample(list(ingroup_leaves), 1)[0]
|
|
48
|
+
tree.reroot_at_edge(rnd_ingroup.edge, length1=0.5 * rnd_ingroup.edge_length, length2=0.5 * rnd_ingroup.edge_length)
|
|
49
|
+
mrca = tree.mrca(taxa=outgroup_in_tree)
|
|
50
|
+
if len(mrca.leaf_nodes()) == mrca_leaves:
|
|
51
|
+
break
|
|
52
|
+
mrca_leaves = len(mrca.leaf_nodes())
|
|
53
|
+
|
|
54
|
+
if mrca.edge_length is not None:
|
|
55
|
+
tree.reroot_at_edge(mrca.edge, length1=0.5 * mrca.edge_length, length2=0.5 * mrca.edge_length)
|
|
56
|
+
|
|
57
|
+
# tree.write_to_path(tree_file_rooted, schema='newick', suppress_rooting=True, unquoted_underscores=True)
|
|
58
|
+
tree_out_string = tree.as_string(schema='newick', suppress_rooting=True, unquoted_underscores=True)
|
|
59
|
+
tree_out_string = tree_out_string.replace("'", "")
|
|
60
|
+
|
|
61
|
+
# add the root bar
|
|
62
|
+
if add_root_branch is True:
|
|
63
|
+
tree_out_string = '(' + tree_out_string
|
|
64
|
+
tree_out_string = tree_out_string.replace(');', '):0.02);')
|
|
65
|
+
|
|
66
|
+
# write out tree string
|
|
67
|
+
tree_file_rooted_handle = open(tree_file_rooted, 'w')
|
|
68
|
+
tree_file_rooted_handle.write(tree_out_string)
|
|
69
|
+
tree_file_rooted_handle.close()
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def RootTree(args):
|
|
73
|
+
|
|
74
|
+
tree_file = args['i']
|
|
75
|
+
out_group_txt = args['og']
|
|
76
|
+
tree_file_rooted = args['o']
|
|
77
|
+
tree_fmt = args['fmt']
|
|
78
|
+
add_root_branch = args['add_root']
|
|
79
|
+
|
|
80
|
+
out_group_set = set()
|
|
81
|
+
for each_og in open(out_group_txt):
|
|
82
|
+
out_group_set.add(each_og.strip())
|
|
83
|
+
|
|
84
|
+
# tre = Tree(tree_file, format=tree_fmt)
|
|
85
|
+
# out_group_lca = tre.get_common_ancestor(out_group_set)
|
|
86
|
+
# tre.set_outgroup(out_group_lca)
|
|
87
|
+
# tre.write(outfile=tree_file_rooted, format=tree_fmt)
|
|
88
|
+
|
|
89
|
+
root_with_outgroup(tree_file, out_group_set, add_root_branch, tree_file_rooted)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
if __name__ == '__main__':
|
|
93
|
+
|
|
94
|
+
RootTree_parser = argparse.ArgumentParser()
|
|
95
|
+
RootTree_parser.add_argument('-i', required=True, help='input tree')
|
|
96
|
+
RootTree_parser.add_argument('-og', required=True, help='out group leaves')
|
|
97
|
+
RootTree_parser.add_argument('-o', required=True, help='output tree')
|
|
98
|
+
RootTree_parser.add_argument('-add_root', required=False, action='store_true', help='add the root branch')
|
|
99
|
+
RootTree_parser.add_argument('-fmt', required=False, default=1, type=int, help='tree format, default: 1')
|
|
100
|
+
args = vars(RootTree_parser.parse_args())
|
|
101
|
+
RootTree(args)
|