treesak 1.51.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of treesak might be problematic. Click here for more details.
- TreeSAK/ALE.py +63 -0
- TreeSAK/ALE1.py +268 -0
- TreeSAK/ALE2.py +168 -0
- TreeSAK/ALE2RTC.py +30 -0
- TreeSAK/ALE3.py +205 -0
- TreeSAK/ALE4.py +636 -0
- TreeSAK/ALE5.py +210 -0
- TreeSAK/ALE6.py +401 -0
- TreeSAK/ALE7.py +126 -0
- TreeSAK/ALE_backup.py +1081 -0
- TreeSAK/AssessCVG.py +128 -0
- TreeSAK/AssessMarker.py +306 -0
- TreeSAK/AssessMarkerDeltaLL.py +257 -0
- TreeSAK/AssessMarkerPA.py +317 -0
- TreeSAK/AssessPB.py +130 -0
- TreeSAK/BMGE.jar +0 -0
- TreeSAK/BMGE.py +49 -0
- TreeSAK/CompareMCMC.py +138 -0
- TreeSAK/ConcateMSA.py +111 -0
- TreeSAK/ConvertMSA.py +135 -0
- TreeSAK/Dir.rb +82 -0
- TreeSAK/ExtractMarkerSeq.py +263 -0
- TreeSAK/FastRoot.py +1175 -0
- TreeSAK/FastRoot_backup.py +1122 -0
- TreeSAK/FigTree.py +34 -0
- TreeSAK/GTDB_tree.py +76 -0
- TreeSAK/GeneTree.py +142 -0
- TreeSAK/KEGG_Luo17.py +807 -0
- TreeSAK/LcaToLeaves.py +66 -0
- TreeSAK/MarkerRef2Tree.py +616 -0
- TreeSAK/MarkerRef2Tree_backup.py +628 -0
- TreeSAK/MarkerSeq2Tree.py +290 -0
- TreeSAK/MarkerSeq2Tree_backup.py +259 -0
- TreeSAK/ModifyTopo.py +116 -0
- TreeSAK/Newick_tree_plotter.py +79 -0
- TreeSAK/OMA.py +170 -0
- TreeSAK/OMA2.py +212 -0
- TreeSAK/OneLineAln.py +50 -0
- TreeSAK/PB.py +155 -0
- TreeSAK/PMSF.py +106 -0
- TreeSAK/PhyloBiAssoc.R +84 -0
- TreeSAK/PhyloBiAssoc.py +167 -0
- TreeSAK/PlotMCMC.py +41 -0
- TreeSAK/PlotMcmcNode.py +152 -0
- TreeSAK/PlotMcmcNode_old.py +252 -0
- TreeSAK/RootTree.py +101 -0
- TreeSAK/RootTreeGTDB214.py +288 -0
- TreeSAK/RootTreeGTDB220.py +300 -0
- TreeSAK/RootTreeGTDB226.py +300 -0
- TreeSAK/SequentialDating.py +16 -0
- TreeSAK/SingleAleHGT.py +157 -0
- TreeSAK/SingleLinePhy.py +50 -0
- TreeSAK/SliceMSA.py +142 -0
- TreeSAK/SplitScore.py +19 -0
- TreeSAK/SplitScore1.py +178 -0
- TreeSAK/SplitScore1OMA.py +148 -0
- TreeSAK/SplitScore2.py +597 -0
- TreeSAK/TaxaCountStats.R +256 -0
- TreeSAK/TaxonTree.py +47 -0
- TreeSAK/TreeSAK_config.py +32 -0
- TreeSAK/VERSION +158 -0
- TreeSAK/VisHPD95.R +45 -0
- TreeSAK/VisHPD95.py +200 -0
- TreeSAK/__init__.py +0 -0
- TreeSAK/ale_parser.py +74 -0
- TreeSAK/ale_splitter.py +63 -0
- TreeSAK/alignment_pruner.pl +1471 -0
- TreeSAK/assessOG.py +45 -0
- TreeSAK/catfasta2phy.py +140 -0
- TreeSAK/cogTree.py +185 -0
- TreeSAK/compare_trees.R +30 -0
- TreeSAK/compare_trees.py +255 -0
- TreeSAK/dating.py +264 -0
- TreeSAK/dating_ss.py +361 -0
- TreeSAK/deltall.py +82 -0
- TreeSAK/do_rrtc.rb +464 -0
- TreeSAK/fa2phy.py +42 -0
- TreeSAK/format_leaf_name.py +70 -0
- TreeSAK/gap_stats.py +38 -0
- TreeSAK/get_SCG_tree.py +742 -0
- TreeSAK/get_arCOG_seq.py +97 -0
- TreeSAK/global_functions.py +222 -0
- TreeSAK/gnm_leaves.py +43 -0
- TreeSAK/iTOL.py +791 -0
- TreeSAK/iTOL_gene_tree.py +80 -0
- TreeSAK/itol_msa_stats.py +56 -0
- TreeSAK/keep_highest_rrtc.py +37 -0
- TreeSAK/koTree.py +194 -0
- TreeSAK/label_tree.R +75 -0
- TreeSAK/label_tree.py +121 -0
- TreeSAK/mad.py +708 -0
- TreeSAK/mcmc2tree.py +58 -0
- TreeSAK/mcmcTC copy.py +92 -0
- TreeSAK/mcmcTC.py +104 -0
- TreeSAK/mcmctree_vs_reltime.R +44 -0
- TreeSAK/mcmctree_vs_reltime.py +252 -0
- TreeSAK/merge_pdf.py +32 -0
- TreeSAK/pRTC.py +56 -0
- TreeSAK/parse_mcmctree.py +198 -0
- TreeSAK/parse_reltime.py +141 -0
- TreeSAK/phy2fa.py +37 -0
- TreeSAK/plot_distruibution_th.py +165 -0
- TreeSAK/prep_mcmctree_ctl.py +92 -0
- TreeSAK/print_leaves.py +32 -0
- TreeSAK/pruneMSA.py +63 -0
- TreeSAK/recode.py +73 -0
- TreeSAK/remove_bias.R +112 -0
- TreeSAK/rename_leaves.py +77 -0
- TreeSAK/replace_clade.py +55 -0
- TreeSAK/root_with_out_group.py +84 -0
- TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
- TreeSAK/subsample_drep_gnms.py +74 -0
- TreeSAK/subset.py +69 -0
- TreeSAK/subset_tree_stupid_old_way.py +193 -0
- TreeSAK/supertree.py +330 -0
- TreeSAK/tmp_1.py +19 -0
- TreeSAK/tmp_2.py +19 -0
- TreeSAK/tmp_3.py +120 -0
- TreeSAK/weighted_rand.rb +23 -0
- treesak-1.51.2.data/scripts/TreeSAK +950 -0
- treesak-1.51.2.dist-info/LICENSE +674 -0
- treesak-1.51.2.dist-info/METADATA +27 -0
- treesak-1.51.2.dist-info/RECORD +125 -0
- treesak-1.51.2.dist-info/WHEEL +5 -0
- treesak-1.51.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import glob
|
|
3
|
+
import argparse
|
|
4
|
+
from ete3 import Tree
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
VisHPD95_usage = '''
|
|
8
|
+
============================ VisHPD95 example command ============================
|
|
9
|
+
|
|
10
|
+
TreeSAK VisHPD95 -i mcmc_out -o HPD95.pdf -n nodes.txt -label label.txt
|
|
11
|
+
TreeSAK VisHPD95 -i mcmc_out -o HPD95.pdf -n nodes.txt -label label.txt -x 9 -y 6
|
|
12
|
+
|
|
13
|
+
# Example data
|
|
14
|
+
https://github.com/songweizhi/TreeSAK/tree/master/example_data/VisHPD95
|
|
15
|
+
|
|
16
|
+
==================================================================================
|
|
17
|
+
'''
|
|
18
|
+
|
|
19
|
+
def mcmctree_out_to_tree_str(mamctree_out):
|
|
20
|
+
|
|
21
|
+
# get tree string from mamctree_out
|
|
22
|
+
tree_str = ''
|
|
23
|
+
tree_line = 0
|
|
24
|
+
current_line = 1
|
|
25
|
+
for each_line in open(mamctree_out):
|
|
26
|
+
if 'Species tree for FigTree. Branch lengths = posterior mean times; 95% CIs = labels' in each_line:
|
|
27
|
+
tree_line = current_line + 1
|
|
28
|
+
if tree_line == current_line:
|
|
29
|
+
tree_str = each_line.strip()
|
|
30
|
+
current_line += 1
|
|
31
|
+
|
|
32
|
+
tree_str_no_space = tree_str.replace(' ', '')
|
|
33
|
+
|
|
34
|
+
# rename tree nodes
|
|
35
|
+
t = Tree(tree_str_no_space, format=1)
|
|
36
|
+
for each_node in t.traverse():
|
|
37
|
+
if each_node.is_leaf():
|
|
38
|
+
node_name_new = '_'.join(each_node.name.split('_')[1:])
|
|
39
|
+
else:
|
|
40
|
+
node_name_new = 't_n%s' % each_node.name
|
|
41
|
+
each_node.name = node_name_new
|
|
42
|
+
|
|
43
|
+
tree_str_renamed = t.write(format=8)
|
|
44
|
+
|
|
45
|
+
return tree_str_renamed
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_internal_node_to_plot(node_txt, mo_file):
|
|
49
|
+
|
|
50
|
+
tree_str = ''
|
|
51
|
+
if os.path.isfile(mo_file):
|
|
52
|
+
tree_str = mcmctree_out_to_tree_str(mo_file)
|
|
53
|
+
|
|
54
|
+
# get nodes to plot
|
|
55
|
+
node_set = set()
|
|
56
|
+
node_rename_dict = dict()
|
|
57
|
+
if os.path.isfile(node_txt) is True:
|
|
58
|
+
for each in open(node_txt):
|
|
59
|
+
each_split = each.strip().split('\t')
|
|
60
|
+
node_str = each_split[0]
|
|
61
|
+
|
|
62
|
+
# get internal_node_to_plot
|
|
63
|
+
internal_node_to_plot = ''
|
|
64
|
+
if ',' not in node_str:
|
|
65
|
+
internal_node_to_plot = each_split[0]
|
|
66
|
+
else:
|
|
67
|
+
leaf_list = node_str.split(',')
|
|
68
|
+
if tree_str == '':
|
|
69
|
+
print('MCMCTree out file not found, program exited!')
|
|
70
|
+
exit()
|
|
71
|
+
current_lca = Tree(tree_str, format=1).get_common_ancestor(leaf_list)
|
|
72
|
+
internal_node_to_plot = current_lca.name
|
|
73
|
+
|
|
74
|
+
# add internal_node_to_plot to node_set
|
|
75
|
+
if internal_node_to_plot != '':
|
|
76
|
+
node_set.add(internal_node_to_plot)
|
|
77
|
+
|
|
78
|
+
# read in name to show in plot
|
|
79
|
+
if len(each_split) == 2:
|
|
80
|
+
if each_split[1] != '':
|
|
81
|
+
node_rename_dict[internal_node_to_plot] = each_split[1]
|
|
82
|
+
else:
|
|
83
|
+
node_set = node_txt.split(',')
|
|
84
|
+
|
|
85
|
+
return node_set, node_rename_dict, tree_str
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def read_in_posterior_mean(mcmctree_out):
|
|
89
|
+
|
|
90
|
+
# read in Posterior mean
|
|
91
|
+
node_to_mean_hpd95_dict = dict()
|
|
92
|
+
current_line = 1
|
|
93
|
+
posterior_mean_header_line = 0
|
|
94
|
+
for each_line in open(mcmctree_out):
|
|
95
|
+
if 'Posterior mean (95% Equal-tail CI) (95% HPD CI) HPD-CI-width' in each_line:
|
|
96
|
+
posterior_mean_header_line = current_line
|
|
97
|
+
|
|
98
|
+
if (posterior_mean_header_line != 0) and (current_line > posterior_mean_header_line):
|
|
99
|
+
each_line_split = each_line.strip().split(' ')
|
|
100
|
+
|
|
101
|
+
each_line_split_no_empty = []
|
|
102
|
+
for each_element in each_line_split:
|
|
103
|
+
if each_element not in ['', '(']:
|
|
104
|
+
each_element_value = each_element.replace('(', '').replace(')', '').replace(',', '')
|
|
105
|
+
each_line_split_no_empty.append(each_element_value)
|
|
106
|
+
if len(each_line_split_no_empty) == 9:
|
|
107
|
+
node_id = each_line_split_no_empty[0]
|
|
108
|
+
value_mean = each_line_split_no_empty[1]
|
|
109
|
+
value_hpd95_small = each_line_split_no_empty[4]
|
|
110
|
+
value_hpd95_big = each_line_split_no_empty[5]
|
|
111
|
+
node_to_mean_hpd95_dict[node_id] = [value_mean, value_hpd95_small, value_hpd95_big]
|
|
112
|
+
current_line += 1
|
|
113
|
+
|
|
114
|
+
return node_to_mean_hpd95_dict
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def VisHPD95(args):
|
|
118
|
+
|
|
119
|
+
mcmc_in = args['i']
|
|
120
|
+
node_txt = args['n']
|
|
121
|
+
label_txt = args['label']
|
|
122
|
+
plot_out = args['o']
|
|
123
|
+
plot_width = args['x']
|
|
124
|
+
plot_height = args['y']
|
|
125
|
+
|
|
126
|
+
pwd_current_file = os.path.realpath(__file__)
|
|
127
|
+
current_file_path = '/'.join(pwd_current_file.split('/')[:-1])
|
|
128
|
+
VisHPD95_R = '%s/VisHPD95.R' % current_file_path
|
|
129
|
+
|
|
130
|
+
dm_out = '%s.txt' % plot_out
|
|
131
|
+
|
|
132
|
+
# check MCMCTree output file/dir
|
|
133
|
+
if os.path.isfile(mcmc_in) is True:
|
|
134
|
+
mcmc_out_file_list = [mcmc_in]
|
|
135
|
+
else:
|
|
136
|
+
mcmc_out_file_re = '%s/*_out.txt' % (mcmc_in)
|
|
137
|
+
mcmc_out_file_list = glob.glob(mcmc_out_file_re)
|
|
138
|
+
|
|
139
|
+
if len(mcmc_out_file_list) == 0:
|
|
140
|
+
print('MCMCTree out file not found, program exited!')
|
|
141
|
+
exit()
|
|
142
|
+
|
|
143
|
+
# read in y-axis label file
|
|
144
|
+
label_dict = dict()
|
|
145
|
+
color_dict = dict()
|
|
146
|
+
shape_dict = dict()
|
|
147
|
+
if label_txt is not None:
|
|
148
|
+
for each_sample in open(label_txt):
|
|
149
|
+
each_sample_split = each_sample.strip().split('\t')
|
|
150
|
+
if len(each_sample_split) == 3:
|
|
151
|
+
label_dict[each_sample_split[0]] = each_sample_split[1]
|
|
152
|
+
color_dict[each_sample_split[0]] = each_sample_split[1]
|
|
153
|
+
shape_dict[each_sample_split[0]] = each_sample_split[2]
|
|
154
|
+
else:
|
|
155
|
+
print('Format error: %s' % label_txt)
|
|
156
|
+
exit()
|
|
157
|
+
|
|
158
|
+
dm_out_handle = open(dm_out, 'w')
|
|
159
|
+
dm_out_handle.write('Test\tShape\tVar\tMean\tLow\tHigh\n')
|
|
160
|
+
for mcmc_out_file in mcmc_out_file_list:
|
|
161
|
+
mcmc_out_file_no_path = mcmc_out_file
|
|
162
|
+
if '/' in mcmc_out_file_no_path:
|
|
163
|
+
mcmc_out_file_no_path = mcmc_out_file_no_path.split('/')[-1]
|
|
164
|
+
|
|
165
|
+
color_col_to_write = color_dict.get(mcmc_out_file_no_path, mcmc_out_file_no_path)
|
|
166
|
+
shape_col_to_write = shape_dict.get(mcmc_out_file_no_path, mcmc_out_file_no_path)
|
|
167
|
+
node_set, node_rename_dict, tree_str = get_internal_node_to_plot(node_txt, mcmc_out_file)
|
|
168
|
+
node_to_mean_95_hpd_dict = read_in_posterior_mean(mcmc_out_file)
|
|
169
|
+
|
|
170
|
+
for each_node in node_set:
|
|
171
|
+
node_name_to_write = node_rename_dict.get(each_node, each_node)
|
|
172
|
+
mean_95_hpd_list = node_to_mean_95_hpd_dict.get(each_node)
|
|
173
|
+
dm_out_handle.write('%s\t%s\t%s\t%s\n' % (color_col_to_write, shape_col_to_write, node_name_to_write, '\t'.join(mean_95_hpd_list)))
|
|
174
|
+
dm_out_handle.close()
|
|
175
|
+
|
|
176
|
+
plot_cmd = 'Rscript %s -i %s -x %s -y %s -o %s' % (VisHPD95_R, dm_out, plot_width, plot_height, plot_out)
|
|
177
|
+
os.system(plot_cmd)
|
|
178
|
+
print('Plot exported to: %s' % plot_out)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
if __name__ == '__main__':
|
|
182
|
+
|
|
183
|
+
VisHPD95_parser = argparse.ArgumentParser()
|
|
184
|
+
VisHPD95_parser.add_argument('-i', required=True, help='mcmc.txt file or folder')
|
|
185
|
+
VisHPD95_parser.add_argument('-n', required=True, help='Nodes to plot')
|
|
186
|
+
VisHPD95_parser.add_argument('-label', required=False, default=None, help='labels on y axis')
|
|
187
|
+
VisHPD95_parser.add_argument('-x', required=False, default=8,type=int, help='plot width, default: 8')
|
|
188
|
+
VisHPD95_parser.add_argument('-y', required=False, default=5,type=int, help='plot height, default: 5')
|
|
189
|
+
VisHPD95_parser.add_argument('-o', required=True, help='Output plot')
|
|
190
|
+
args = vars(VisHPD95_parser.parse_args())
|
|
191
|
+
VisHPD95(args)
|
|
192
|
+
|
|
193
|
+
'''
|
|
194
|
+
|
|
195
|
+
cd /Users/songweizhi/Desktop/777
|
|
196
|
+
python3 ~/PycharmProjects/TreeSAK/TreeSAK/VisHPD95.py -i M1_mcmc_txt -o M1_HPD95.pdf -n nodes_five.txt -label y_label_out.txt
|
|
197
|
+
|
|
198
|
+
'''
|
TreeSAK/parse_reltime.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import argparse
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
parse_reltime_usage = '''
|
|
6
|
+
==================== parse_reltime example commands ====================
|
|
7
|
+
|
|
8
|
+
TreeSAK parse_reltime -i RelTime.txt -n dbscc_lca.txt -o dbscc_age.txt
|
|
9
|
+
|
|
10
|
+
========================================================================
|
|
11
|
+
'''
|
|
12
|
+
|
|
13
|
+
def sep_path_basename_ext(file_in):
|
|
14
|
+
|
|
15
|
+
f_path, f_name = os.path.split(file_in)
|
|
16
|
+
if f_path == '':
|
|
17
|
+
f_path = '.'
|
|
18
|
+
f_base, f_ext = os.path.splitext(f_name)
|
|
19
|
+
|
|
20
|
+
return f_name, f_path, f_base, f_ext[1:]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_lca(reltime_txt, leaf_1_name, leaf_2_name):
|
|
24
|
+
|
|
25
|
+
leaf_set = set()
|
|
26
|
+
child_to_parent_dict = dict()
|
|
27
|
+
id_to_name_dict = dict()
|
|
28
|
+
name_to_id_dict = dict()
|
|
29
|
+
for each_line in open(reltime_txt):
|
|
30
|
+
if not each_line.startswith('NodeLabel'):
|
|
31
|
+
each_line_split = each_line.strip().split('\t')
|
|
32
|
+
each_line_split = [i.strip() for i in each_line_split]
|
|
33
|
+
if len(each_line_split) > 1:
|
|
34
|
+
node_name = each_line_split[0].replace(' ', '_')
|
|
35
|
+
node_id = each_line_split[1]
|
|
36
|
+
des1 = each_line_split[2]
|
|
37
|
+
des2 = each_line_split[3]
|
|
38
|
+
id_to_name_dict[node_id] = node_name
|
|
39
|
+
name_to_id_dict[node_name] = node_id
|
|
40
|
+
child_to_parent_dict[des1] = node_id
|
|
41
|
+
child_to_parent_dict[des2] = node_id
|
|
42
|
+
if (des1 == '-') and (des2 == '-'):
|
|
43
|
+
leaf_set.add(node_id)
|
|
44
|
+
|
|
45
|
+
leaf_to_lineage_dict = dict()
|
|
46
|
+
for leaf in sorted([i for i in leaf_set]):
|
|
47
|
+
original_leaf = leaf
|
|
48
|
+
lineage_list = [leaf]
|
|
49
|
+
while leaf in child_to_parent_dict:
|
|
50
|
+
leaf_p = child_to_parent_dict[leaf]
|
|
51
|
+
lineage_list.append(leaf_p)
|
|
52
|
+
leaf = leaf_p
|
|
53
|
+
leaf_to_lineage_dict[original_leaf] = lineage_list
|
|
54
|
+
|
|
55
|
+
leaf_1_id = name_to_id_dict[leaf_1_name]
|
|
56
|
+
leaf_2_id = name_to_id_dict[leaf_2_name]
|
|
57
|
+
leaf_1_linage = leaf_to_lineage_dict[leaf_1_id]
|
|
58
|
+
leaf_2_linage = leaf_to_lineage_dict[leaf_2_id]
|
|
59
|
+
|
|
60
|
+
lca = ''
|
|
61
|
+
for each_p in leaf_1_linage[::-1]:
|
|
62
|
+
if each_p in leaf_2_linage:
|
|
63
|
+
lca = each_p
|
|
64
|
+
return lca
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def parse_reltime(args):
|
|
68
|
+
|
|
69
|
+
reltime_txt = args['i']
|
|
70
|
+
interested_nodes_txt = args['n']
|
|
71
|
+
op_txt = args['o']
|
|
72
|
+
|
|
73
|
+
f_name, f_path, f_base, f_ext = sep_path_basename_ext(op_txt)
|
|
74
|
+
op_txt_all_info = '%s/%s_all_info.%s' % (f_path,f_base, f_ext)
|
|
75
|
+
|
|
76
|
+
lca_to_leaves_dict = dict()
|
|
77
|
+
interested_node_desc_dict = dict()
|
|
78
|
+
for interested_node in open(interested_nodes_txt):
|
|
79
|
+
interested_node_split = interested_node.strip().split('\t')
|
|
80
|
+
paired_leaves = interested_node_split[0]
|
|
81
|
+
interested_node_desc = paired_leaves
|
|
82
|
+
if len(interested_node_split) > 1:
|
|
83
|
+
interested_node_desc = interested_node_split[1]
|
|
84
|
+
interested_node_desc_dict[paired_leaves] = interested_node_desc
|
|
85
|
+
leaf_1 = paired_leaves.split(',')[0]
|
|
86
|
+
leaf_2 = paired_leaves.split(',')[1]
|
|
87
|
+
lca_id = get_lca(reltime_txt, leaf_1, leaf_2)
|
|
88
|
+
lca_to_leaves_dict[lca_id] = paired_leaves.strip()
|
|
89
|
+
|
|
90
|
+
op_txt_all_info_handle = open(op_txt_all_info, 'w')
|
|
91
|
+
line_num_index = 0
|
|
92
|
+
for each_line in open(reltime_txt):
|
|
93
|
+
each_line_split = each_line.strip().split('\t')
|
|
94
|
+
each_line_split = [i.strip() for i in each_line_split]
|
|
95
|
+
if line_num_index == 0:
|
|
96
|
+
op_txt_all_info_handle.write('Leaves\tDescription\t%s\n' % ('\t'.join(each_line_split)))
|
|
97
|
+
else:
|
|
98
|
+
if len(each_line_split) > 1:
|
|
99
|
+
node_id = each_line_split[1]
|
|
100
|
+
if node_id in lca_to_leaves_dict:
|
|
101
|
+
node_id = each_line_split[1]
|
|
102
|
+
corresponding_leaves = lca_to_leaves_dict[node_id]
|
|
103
|
+
interested_node_desc = interested_node_desc_dict[corresponding_leaves]
|
|
104
|
+
op_txt_all_info_handle.write('%s\t%s\t%s\n' % (corresponding_leaves, interested_node_desc, '\t'.join(each_line_split)))
|
|
105
|
+
line_num_index += 1
|
|
106
|
+
op_txt_all_info_handle.close()
|
|
107
|
+
|
|
108
|
+
op_txt_handle = open(op_txt, 'w')
|
|
109
|
+
op_txt_handle.write('Node\tDivTime\tCI_Lower\tCI_Upper\n')
|
|
110
|
+
line_num_index = 0
|
|
111
|
+
for each_line in open(op_txt_all_info):
|
|
112
|
+
if line_num_index > 0:
|
|
113
|
+
each_line_split = each_line.strip().split('\t')
|
|
114
|
+
desc = each_line_split[1]
|
|
115
|
+
div_time = each_line_split[9]
|
|
116
|
+
ci_lower = each_line_split[10]
|
|
117
|
+
ci_upper = each_line_split[11]
|
|
118
|
+
op_txt_handle.write('%s\t%s\t%s\t%s\n' % (desc, div_time, ci_lower, ci_upper))
|
|
119
|
+
line_num_index += 1
|
|
120
|
+
op_txt_handle.close()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
if __name__ == '__main__':
|
|
124
|
+
|
|
125
|
+
parse_reltime_parser = argparse.ArgumentParser()
|
|
126
|
+
parse_reltime_parser.add_argument('-i', required=True, help='reltime output file')
|
|
127
|
+
parse_reltime_parser.add_argument('-n', required=True, help='interested node txt')
|
|
128
|
+
parse_reltime_parser.add_argument('-o', required=True, help='output txt file')
|
|
129
|
+
args = vars(parse_reltime_parser.parse_args())
|
|
130
|
+
parse_reltime(args)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
'''
|
|
134
|
+
|
|
135
|
+
cd /Users/songweizhi/Desktop
|
|
136
|
+
python3 /Users/songweizhi/PycharmProjects/TreeSAK/TreeSAK/parse_reltime.py -i /Users/songweizhi/Desktop/Sponge_r220/6_dating/RelTime/topo2_p30_RelTime_JTT_Gamma4/topo2_p30_RelTime_Gamma4.txt -n yang_7.txt -o dbscc_age.txt
|
|
137
|
+
|
|
138
|
+
cd /Users/songweizhi/Desktop
|
|
139
|
+
TreeSAK parse_reltime -i /Users/songweizhi/Desktop/Sponge_r220/6_dating/RelTime/topo2_p30_RelTime_JTT_Gamma4/topo2_p30_RelTime_Gamma4.txt -n yang_7.txt -o dbscc_age.txt
|
|
140
|
+
|
|
141
|
+
'''
|
TreeSAK/phy2fa.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from Bio import SeqIO
|
|
3
|
+
from Bio import AlignIO
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
phy2fa_usage = '''
|
|
7
|
+
======= phy2fa example commands =======
|
|
8
|
+
|
|
9
|
+
TreeSAK phy2fa -i msa.phy -o msa.fa
|
|
10
|
+
|
|
11
|
+
=======================================
|
|
12
|
+
'''
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def phy2fa(args):
|
|
16
|
+
|
|
17
|
+
phylip_in = args['i']
|
|
18
|
+
fasta_out = args['o']
|
|
19
|
+
|
|
20
|
+
for aln in AlignIO.parse(phylip_in, "phylip"):
|
|
21
|
+
print(aln)
|
|
22
|
+
|
|
23
|
+
# alignments = list(AlignIO.parse(phylip_in, "phylip"))
|
|
24
|
+
# print(alignments)
|
|
25
|
+
# records = SeqIO.parse(phylip_in, "phylip")
|
|
26
|
+
# count = SeqIO.write(records, fasta_out, "fasta")
|
|
27
|
+
# print("Converted %i records" % count)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
if __name__ == '__main__':
|
|
31
|
+
|
|
32
|
+
# initialize the options parser
|
|
33
|
+
phy2fa_parser = argparse.ArgumentParser()
|
|
34
|
+
phy2fa_parser.add_argument('-i', required=True, help='input MSA in phylip format')
|
|
35
|
+
phy2fa_parser.add_argument('-o', required=True, help='output MSA in fasta format')
|
|
36
|
+
args = vars(phy2fa_parser.parse_args())
|
|
37
|
+
phy2fa(args)
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from tqdm import tqdm
|
|
4
|
+
from ete3 import Tree
|
|
5
|
+
from glob import glob
|
|
6
|
+
from os.path import *
|
|
7
|
+
import plotly.express as px
|
|
8
|
+
import plotly.graph_objects as go
|
|
9
|
+
import plotly.figure_factory as ff
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def read_mcmc(mcmc, all_col=False):
|
|
13
|
+
if type(mcmc) != str:
|
|
14
|
+
return mcmc
|
|
15
|
+
if all_col:
|
|
16
|
+
mcmc_df = pd.read_csv(mcmc, sep='\t', index_col=0)
|
|
17
|
+
else:
|
|
18
|
+
f1 = open(mcmc)
|
|
19
|
+
header = [_ for _ in next(f1).strip().split('\t')]
|
|
20
|
+
r_header = [_ for _ in header if not _.startswith('r_g')]
|
|
21
|
+
# normally it need to iterate rows and ignore the columns representing rates
|
|
22
|
+
text = '\t'.join(r_header) + '\n'
|
|
23
|
+
r_header = set(r_header)
|
|
24
|
+
for row in f1:
|
|
25
|
+
text += '\t'.join([r for r, h in zip(row.strip().split('\t'), header) if h in r_header]) + '\n'
|
|
26
|
+
mcmc_df = pd.read_csv(io.StringIO(text), sep='\t', index_col=0)
|
|
27
|
+
return mcmc_df
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def get_node_name_from_log(f):
|
|
31
|
+
# f should be the *.log file
|
|
32
|
+
rows = open(f).read().split('\n')
|
|
33
|
+
idx = [_ for _, r in enumerate(rows) if r == 'Species tree']
|
|
34
|
+
if not idx:
|
|
35
|
+
print("prior not complete")
|
|
36
|
+
return
|
|
37
|
+
idx = idx[0]
|
|
38
|
+
start_idx = idx + 3
|
|
39
|
+
end_idx = 0
|
|
40
|
+
for _ in range(idx, 100000):
|
|
41
|
+
if rows[_] == '':
|
|
42
|
+
end_idx = _
|
|
43
|
+
break
|
|
44
|
+
tree_idx1 = end_idx + 1
|
|
45
|
+
tree_idx2 = end_idx + 2
|
|
46
|
+
# find the index
|
|
47
|
+
n2father = {}
|
|
48
|
+
for i in range(start_idx, end_idx):
|
|
49
|
+
row = [_ for _ in rows[i].split(' ') if _]
|
|
50
|
+
father, n, name = row[0], row[1], row[2]
|
|
51
|
+
n2father[name if len(row) == 4 else n] = father
|
|
52
|
+
|
|
53
|
+
t = Tree(rows[tree_idx2], format=8)
|
|
54
|
+
for l in t.traverse('postorder'):
|
|
55
|
+
if l.up is None:
|
|
56
|
+
break
|
|
57
|
+
if not l.up.name:
|
|
58
|
+
l.up.name = n2father[l.name]
|
|
59
|
+
return t
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
indir = '/Users/songweizhi/Desktop/DateArTree/plot_distruibution/stepwise'
|
|
63
|
+
tree_dir = '/Users/songweizhi/Desktop/DateArTree/plot_distruibution/treefile_dir'
|
|
64
|
+
plot_dir = '/Users/songweizhi/Desktop/DateArTree/plot_distruibution'
|
|
65
|
+
|
|
66
|
+
gene_names = ['M24', 'COG25']
|
|
67
|
+
M24_gene_list = ['MitoCOG0043', 'MitoCOG0040', 'MitoCOG0055', 'MitoCOG0052', 'MitoCOG0053', 'MitoCOG0133', 'MitoCOG0008', 'MitoCOG0009', 'MitoCOG0027', 'MitoCOG0031', 'MitoCOG0030', 'MitoCOG0001', 'MitoCOG0003', 'MitoCOG0012', 'MitoCOG0010', 'MitoCOG0004', 'MitoCOG0005', 'MitoCOG0011', 'MitoCOG0039', 'MitoCOG0060', 'MitoCOG0071', 'MitoCOG0059', 'MitoCOG0067', 'MitoCOG0066']
|
|
68
|
+
COG25_gene_list = ['223163', '223176', '223175', '223607', '223159', '223165', '223170', '223164', '223158', '223172', '223128', '223665', '223275', '223328', '223280', '223127', '223279', '273102', '223130', '223181', '223180', '223168', '223178', '223596', '223556']
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
setname2genes = dict()
|
|
72
|
+
setname2genes['M24'] = M24_gene_list
|
|
73
|
+
setname2genes['COG25'] = COG25_gene_list
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
gene2num = {}
|
|
77
|
+
gene2dl = {}
|
|
78
|
+
for gene_id in (M24_gene_list + COG25_gene_list):
|
|
79
|
+
pwd_tree_file = '%s/%s.treefile' % (tree_dir, gene_id)
|
|
80
|
+
pwd_iqtree_log = '%s/%s.iqtree' % (tree_dir, gene_id)
|
|
81
|
+
rows = open(pwd_iqtree_log).read().strip().split("\n")
|
|
82
|
+
idx = [idx for idx, v in enumerate(rows) if "deltaL bp-RELL" in v][0]
|
|
83
|
+
r1, r2 = rows[idx + 2], rows[idx + 3]
|
|
84
|
+
r1 = [_ for _ in r1.strip().split(" ") if _]
|
|
85
|
+
r2 = [_ for _ in r2.strip().split(" ") if _]
|
|
86
|
+
if r2[2] == "0":
|
|
87
|
+
gene2dl[gene_id] = float(r1[2])
|
|
88
|
+
else:
|
|
89
|
+
gene2dl[gene_id] = float(r2[2])
|
|
90
|
+
gene2num[gene_id] = len(Tree(pwd_tree_file).get_leaf_names())
|
|
91
|
+
|
|
92
|
+
# plot 1
|
|
93
|
+
for setname, genes in setname2genes.items():
|
|
94
|
+
dl_list = [gene2dl[_] for _ in genes]
|
|
95
|
+
dl_list = sorted(dl_list, reverse=True)
|
|
96
|
+
fig = go.Figure()
|
|
97
|
+
fig.add_bar(y=dl_list)
|
|
98
|
+
fig.update_layout(title_text=setname,title_x=0.5,title_y=1,width=700,height=100,template='simple_white',
|
|
99
|
+
margin_b=10,margin_l=10,margin_r=10,margin_t=10)
|
|
100
|
+
fig.write_image('%s/Plot_1_%s.pdf' % (plot_dir, setname))
|
|
101
|
+
|
|
102
|
+
for gene_set in gene_names:
|
|
103
|
+
for _model in ['LG']: # C60
|
|
104
|
+
t = []
|
|
105
|
+
for f in glob(f'{indir}/{gene_set}/r*/1pf_{_model}/mcmctree/mcmc.txt'):
|
|
106
|
+
if exists(f.replace('mcmc.txt', 'FigTree.tre')):
|
|
107
|
+
t.append((f.split('/')[-4] + ' MCMC', f))
|
|
108
|
+
t = sorted(t, key=lambda x: int(x[0].split(' ')[0][1:]))
|
|
109
|
+
|
|
110
|
+
dfs = []
|
|
111
|
+
targets = []
|
|
112
|
+
for cal, mcmc in tqdm(t):
|
|
113
|
+
tre = get_node_name_from_log(mcmc.replace('mcmc.txt','03_mcmctree.log'))
|
|
114
|
+
df = read_mcmc(mcmc)
|
|
115
|
+
try:
|
|
116
|
+
df = df.sample(5000)
|
|
117
|
+
except:
|
|
118
|
+
print(mcmc)
|
|
119
|
+
for lca, name in [('GCA_001828545.1,GCA_005524015.1', 'Anammox'), ('GCA_013697045.1,GCA_002356115.1', 'Gamma-AOB'),
|
|
120
|
+
('GCA_001772005.1,GCA_013521015.1', 'Beta-AOB'), ('GCA_017879665.1,GCA_013140535.1', 'Comammox'),
|
|
121
|
+
('Acanthamoeba_castellanii,Andalucia_godoyi', 'Euk'), ('Andalucia_godoyi,Ostreococcus_tauri', 'Euk'),
|
|
122
|
+
('Cyanophora_paradoxa,NC_002186.1', 'Euk')]:
|
|
123
|
+
try:
|
|
124
|
+
n = tre.get_common_ancestor(lca.split(',')).name
|
|
125
|
+
targets.append(str(n))
|
|
126
|
+
n = 't_n' + str(n)
|
|
127
|
+
times = df[[n]]
|
|
128
|
+
except:
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
times.columns = ['time']
|
|
132
|
+
times.loc[:, 'group name'] = name
|
|
133
|
+
times.loc[:, 'cal'] = cal
|
|
134
|
+
dfs.append(times)
|
|
135
|
+
|
|
136
|
+
# plot 2
|
|
137
|
+
_df = pd.concat(dfs, axis=0)
|
|
138
|
+
g2color = {"Gamma-AOB": "#78fce0", "Beta-AOB": "#956bb4", "Comammox": "#edc21a", "Anammox": "#ff8000"}
|
|
139
|
+
_df = _df.loc[_df["group name"].isin(list(g2color)), :]
|
|
140
|
+
_fig = px.violin( _df, y="cal", x="time", color="group name", color_discrete_map=g2color, points=False, orientation="h")
|
|
141
|
+
_fig.update_traces(side="positive", fillcolor='rgba(0,0,0,0)', width=1.8)
|
|
142
|
+
_fig.update_traces(showlegend=False)
|
|
143
|
+
num_y = len(_df["cal"].unique())
|
|
144
|
+
_fig.layout.template = "simple_white"
|
|
145
|
+
_fig.layout.width = 700
|
|
146
|
+
_fig.layout.height = 750
|
|
147
|
+
_fig.update_xaxes(range=[40, 0])
|
|
148
|
+
_fig.update_layout(margin_t=10, title_text=f'{gene_set} {_model}', title_x=0.5)
|
|
149
|
+
_fig.write_image(f'{plot_dir}/Plot_2_{gene_set}_gradient_{_model}.pdf')
|
|
150
|
+
|
|
151
|
+
# plot 3
|
|
152
|
+
xs = []
|
|
153
|
+
ys = []
|
|
154
|
+
for ng, subdf in sorted(_df.groupby('cal'),key=lambda x: int(x[0].split(' ')[0].replace('r', ''))):
|
|
155
|
+
t1 = subdf.loc[subdf['group name'] == 'Gamma-AOB', 'time'].median()
|
|
156
|
+
t2 = subdf.loc[subdf['group name'] == 'Anammox', 'time'].median()
|
|
157
|
+
deltaT = t2-t1
|
|
158
|
+
ys.append(deltaT)
|
|
159
|
+
xs.append(int(ng.split(' ')[0].replace('r', '')))
|
|
160
|
+
fig = go.Figure()
|
|
161
|
+
fig.add_scatter(x=xs, y=ys, mode='markers+lines', showlegend=False)
|
|
162
|
+
fig.update_layout(width=300, height=300, margin_t=30, margin_l=10, margin_b=10, margin_r=10,
|
|
163
|
+
template='simple_white', title_text=f'{gene_set} {_model}', title_x=0.5)
|
|
164
|
+
fig.write_image('%s/Plot_3_%s_%s.pdf' % (plot_dir, gene_set, _model))
|
|
165
|
+
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import itertools
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def prep_mcmctree_ctl(ctl_para_dict, mcmctree_ctl_file):
|
|
5
|
+
|
|
6
|
+
with open(mcmctree_ctl_file, 'w') as ctl_file_handle:
|
|
7
|
+
ctl_file_handle.write(' finetune = %s\n' % ctl_para_dict.get('seed', '-1'))
|
|
8
|
+
ctl_file_handle.write(' seqfile = %s\n' % ctl_para_dict['seqfile'])
|
|
9
|
+
ctl_file_handle.write(' treefile = %s\n' % ctl_para_dict['treefile'])
|
|
10
|
+
ctl_file_handle.write(' mcmcfile = %s\n' % ctl_para_dict['mcmcfile'])
|
|
11
|
+
ctl_file_handle.write(' outfile = %s\n' % ctl_para_dict['outfile'])
|
|
12
|
+
ctl_file_handle.write(' ndata = %s\n' % ctl_para_dict.get('ndata', 1))
|
|
13
|
+
ctl_file_handle.write(' seqtype = %s\n' % ctl_para_dict['seqtype'])
|
|
14
|
+
ctl_file_handle.write(' usedata = %s\n' % ctl_para_dict['usedata'])
|
|
15
|
+
ctl_file_handle.write(' clock = %s\n' % ctl_para_dict['clock'])
|
|
16
|
+
ctl_file_handle.write(' RootAge = %s\n' % ctl_para_dict.get('RootAge', '<1.0'))
|
|
17
|
+
ctl_file_handle.write(' model = %s\n' % ctl_para_dict.get('model', 0))
|
|
18
|
+
ctl_file_handle.write(' alpha = %s\n' % ctl_para_dict.get('alpha', 0.5))
|
|
19
|
+
ctl_file_handle.write(' ncatG = %s\n' % ctl_para_dict.get('ncatG', 4))
|
|
20
|
+
ctl_file_handle.write(' cleandata = %s\n' % ctl_para_dict.get('cleandata', 0))
|
|
21
|
+
ctl_file_handle.write(' BDparas = %s\n' % ctl_para_dict.get('BDparas', '1 1 0.1'))
|
|
22
|
+
ctl_file_handle.write(' kappa_gamma = %s\n' % ctl_para_dict.get('kappa_gamma', '6 2'))
|
|
23
|
+
ctl_file_handle.write(' alpha_gamma = %s\n' % ctl_para_dict.get('alpha_gamma', '1 1'))
|
|
24
|
+
ctl_file_handle.write(' rgene_gamma = %s\n' % ctl_para_dict.get('rgene_gamma', '1 50 1'))
|
|
25
|
+
ctl_file_handle.write(' sigma2_gamma = %s\n' % ctl_para_dict.get('sigma2_gamma', '1 10 1'))
|
|
26
|
+
ctl_file_handle.write(' finetune = %s\n' % ctl_para_dict.get('finetune', '1: .1 .1 .1 .1 .1 .1'))
|
|
27
|
+
ctl_file_handle.write(' print = %s\n' % ctl_para_dict.get('print', 1))
|
|
28
|
+
ctl_file_handle.write(' burnin = %s\n' % ctl_para_dict.get('burnin', 50000))
|
|
29
|
+
ctl_file_handle.write(' sampfreq = %s\n' % ctl_para_dict.get('sampfreq', 5))
|
|
30
|
+
ctl_file_handle.write(' nsample = %s\n' % ctl_para_dict.get('nsample', 50000))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
mcmctree_ctl_dict = {'seqfile' : 'concatenated.phy',
|
|
34
|
+
'treefile': 'deltall75_pa75_rooted_with_calibrations.nwk',
|
|
35
|
+
'mcmcfile': 'mcmc.txt',
|
|
36
|
+
'outfile' : 'DateArTree_out.txt',
|
|
37
|
+
'seqtype' : 2,
|
|
38
|
+
'usedata' : 3,
|
|
39
|
+
'clock' : 3}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
prep_mcmctree_ctl(mcmctree_ctl_dict, '/Users/songweizhi/Desktop/aaa.txt')
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_parameter_combinations(para_to_test_dict):
|
|
46
|
+
|
|
47
|
+
para_lol_name = []
|
|
48
|
+
para_lol_value = []
|
|
49
|
+
para_lol_name_with_value = []
|
|
50
|
+
for each_para in sorted(list(para_to_test_dict.keys())):
|
|
51
|
+
para_setting_list_name = []
|
|
52
|
+
para_setting_list_value = []
|
|
53
|
+
para_setting_list_name_with_value = []
|
|
54
|
+
for each_setting in sorted(para_to_test_dict[each_para]):
|
|
55
|
+
name_str = ('%s%s' % (each_para, each_setting)).replace(' ', '_')
|
|
56
|
+
para_setting_list_name.append(each_para)
|
|
57
|
+
para_setting_list_value.append(each_setting)
|
|
58
|
+
para_setting_list_name_with_value.append(name_str)
|
|
59
|
+
para_lol_name.append(para_setting_list_name)
|
|
60
|
+
para_lol_value.append(para_setting_list_value)
|
|
61
|
+
para_lol_name_with_value.append(para_setting_list_name_with_value)
|
|
62
|
+
|
|
63
|
+
all_combination_list_name = [p for p in itertools.product(*para_lol_name)]
|
|
64
|
+
all_combination_list_value = [p for p in itertools.product(*para_lol_value)]
|
|
65
|
+
all_combination_list_name_with_value = [p for p in itertools.product(*para_lol_name_with_value)]
|
|
66
|
+
all_combination_list_name_with_value_str = ['_'.join(i) for i in all_combination_list_name_with_value]
|
|
67
|
+
|
|
68
|
+
para_dod = dict()
|
|
69
|
+
element_index = 0
|
|
70
|
+
for each_combination in all_combination_list_name_with_value_str:
|
|
71
|
+
current_name_list = all_combination_list_name[element_index]
|
|
72
|
+
current_value_list = all_combination_list_value[element_index]
|
|
73
|
+
current_para_dict = dict()
|
|
74
|
+
for key, value in zip(current_name_list, current_value_list):
|
|
75
|
+
current_para_dict[key] = value
|
|
76
|
+
para_dod[each_combination] = current_para_dict
|
|
77
|
+
element_index += 1
|
|
78
|
+
|
|
79
|
+
return para_dod
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
para_to_test_dict = {'clock': [2, 3], 'nsample': [20000, 50000], 'model': [0, 4], 'kappa_gamma': ['6 2', '5 1']}
|
|
83
|
+
para_dod = get_parameter_combinations(para_to_test_dict)
|
|
84
|
+
print(para_dod)
|
|
85
|
+
|
|
86
|
+
# all_combination_list_in_str = ['_'.join(i) for i in all_combination_list]
|
|
87
|
+
# print(all_combination_list_in_str)
|
|
88
|
+
# print(len(all_combination_list_in_str))
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
|
TreeSAK/print_leaves.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from ete3 import Tree
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
print_leaves_usage = '''
|
|
6
|
+
======= print_leaves example commands =======
|
|
7
|
+
|
|
8
|
+
TreeSAK print_leaves -i in.tree
|
|
9
|
+
|
|
10
|
+
=============================================
|
|
11
|
+
'''
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def print_leaves(args):
|
|
15
|
+
|
|
16
|
+
tree_file_in = args['i']
|
|
17
|
+
|
|
18
|
+
leaf_list = []
|
|
19
|
+
for leaf in Tree(tree_file_in, format=1):
|
|
20
|
+
leaf_name = leaf.name
|
|
21
|
+
leaf_list.append(leaf_name)
|
|
22
|
+
|
|
23
|
+
print('\n'.join(sorted(leaf_list)))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
if __name__ == '__main__':
|
|
27
|
+
|
|
28
|
+
# initialize the options parser
|
|
29
|
+
parser = argparse.ArgumentParser()
|
|
30
|
+
parser.add_argument('-i', required=True, help='input tree file')
|
|
31
|
+
args = vars(parser.parse_args())
|
|
32
|
+
print_leaves(args)
|