treesak 1.51.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of treesak might be problematic. Click here for more details.
- TreeSAK/ALE.py +63 -0
- TreeSAK/ALE1.py +268 -0
- TreeSAK/ALE2.py +168 -0
- TreeSAK/ALE2RTC.py +30 -0
- TreeSAK/ALE3.py +205 -0
- TreeSAK/ALE4.py +636 -0
- TreeSAK/ALE5.py +210 -0
- TreeSAK/ALE6.py +401 -0
- TreeSAK/ALE7.py +126 -0
- TreeSAK/ALE_backup.py +1081 -0
- TreeSAK/AssessCVG.py +128 -0
- TreeSAK/AssessMarker.py +306 -0
- TreeSAK/AssessMarkerDeltaLL.py +257 -0
- TreeSAK/AssessMarkerPA.py +317 -0
- TreeSAK/AssessPB.py +130 -0
- TreeSAK/BMGE.jar +0 -0
- TreeSAK/BMGE.py +49 -0
- TreeSAK/CompareMCMC.py +138 -0
- TreeSAK/ConcateMSA.py +111 -0
- TreeSAK/ConvertMSA.py +135 -0
- TreeSAK/Dir.rb +82 -0
- TreeSAK/ExtractMarkerSeq.py +263 -0
- TreeSAK/FastRoot.py +1175 -0
- TreeSAK/FastRoot_backup.py +1122 -0
- TreeSAK/FigTree.py +34 -0
- TreeSAK/GTDB_tree.py +76 -0
- TreeSAK/GeneTree.py +142 -0
- TreeSAK/KEGG_Luo17.py +807 -0
- TreeSAK/LcaToLeaves.py +66 -0
- TreeSAK/MarkerRef2Tree.py +616 -0
- TreeSAK/MarkerRef2Tree_backup.py +628 -0
- TreeSAK/MarkerSeq2Tree.py +290 -0
- TreeSAK/MarkerSeq2Tree_backup.py +259 -0
- TreeSAK/ModifyTopo.py +116 -0
- TreeSAK/Newick_tree_plotter.py +79 -0
- TreeSAK/OMA.py +170 -0
- TreeSAK/OMA2.py +212 -0
- TreeSAK/OneLineAln.py +50 -0
- TreeSAK/PB.py +155 -0
- TreeSAK/PMSF.py +106 -0
- TreeSAK/PhyloBiAssoc.R +84 -0
- TreeSAK/PhyloBiAssoc.py +167 -0
- TreeSAK/PlotMCMC.py +41 -0
- TreeSAK/PlotMcmcNode.py +152 -0
- TreeSAK/PlotMcmcNode_old.py +252 -0
- TreeSAK/RootTree.py +101 -0
- TreeSAK/RootTreeGTDB214.py +288 -0
- TreeSAK/RootTreeGTDB220.py +300 -0
- TreeSAK/RootTreeGTDB226.py +300 -0
- TreeSAK/SequentialDating.py +16 -0
- TreeSAK/SingleAleHGT.py +157 -0
- TreeSAK/SingleLinePhy.py +50 -0
- TreeSAK/SliceMSA.py +142 -0
- TreeSAK/SplitScore.py +19 -0
- TreeSAK/SplitScore1.py +178 -0
- TreeSAK/SplitScore1OMA.py +148 -0
- TreeSAK/SplitScore2.py +597 -0
- TreeSAK/TaxaCountStats.R +256 -0
- TreeSAK/TaxonTree.py +47 -0
- TreeSAK/TreeSAK_config.py +32 -0
- TreeSAK/VERSION +158 -0
- TreeSAK/VisHPD95.R +45 -0
- TreeSAK/VisHPD95.py +200 -0
- TreeSAK/__init__.py +0 -0
- TreeSAK/ale_parser.py +74 -0
- TreeSAK/ale_splitter.py +63 -0
- TreeSAK/alignment_pruner.pl +1471 -0
- TreeSAK/assessOG.py +45 -0
- TreeSAK/catfasta2phy.py +140 -0
- TreeSAK/cogTree.py +185 -0
- TreeSAK/compare_trees.R +30 -0
- TreeSAK/compare_trees.py +255 -0
- TreeSAK/dating.py +264 -0
- TreeSAK/dating_ss.py +361 -0
- TreeSAK/deltall.py +82 -0
- TreeSAK/do_rrtc.rb +464 -0
- TreeSAK/fa2phy.py +42 -0
- TreeSAK/format_leaf_name.py +70 -0
- TreeSAK/gap_stats.py +38 -0
- TreeSAK/get_SCG_tree.py +742 -0
- TreeSAK/get_arCOG_seq.py +97 -0
- TreeSAK/global_functions.py +222 -0
- TreeSAK/gnm_leaves.py +43 -0
- TreeSAK/iTOL.py +791 -0
- TreeSAK/iTOL_gene_tree.py +80 -0
- TreeSAK/itol_msa_stats.py +56 -0
- TreeSAK/keep_highest_rrtc.py +37 -0
- TreeSAK/koTree.py +194 -0
- TreeSAK/label_tree.R +75 -0
- TreeSAK/label_tree.py +121 -0
- TreeSAK/mad.py +708 -0
- TreeSAK/mcmc2tree.py +58 -0
- TreeSAK/mcmcTC copy.py +92 -0
- TreeSAK/mcmcTC.py +104 -0
- TreeSAK/mcmctree_vs_reltime.R +44 -0
- TreeSAK/mcmctree_vs_reltime.py +252 -0
- TreeSAK/merge_pdf.py +32 -0
- TreeSAK/pRTC.py +56 -0
- TreeSAK/parse_mcmctree.py +198 -0
- TreeSAK/parse_reltime.py +141 -0
- TreeSAK/phy2fa.py +37 -0
- TreeSAK/plot_distruibution_th.py +165 -0
- TreeSAK/prep_mcmctree_ctl.py +92 -0
- TreeSAK/print_leaves.py +32 -0
- TreeSAK/pruneMSA.py +63 -0
- TreeSAK/recode.py +73 -0
- TreeSAK/remove_bias.R +112 -0
- TreeSAK/rename_leaves.py +77 -0
- TreeSAK/replace_clade.py +55 -0
- TreeSAK/root_with_out_group.py +84 -0
- TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
- TreeSAK/subsample_drep_gnms.py +74 -0
- TreeSAK/subset.py +69 -0
- TreeSAK/subset_tree_stupid_old_way.py +193 -0
- TreeSAK/supertree.py +330 -0
- TreeSAK/tmp_1.py +19 -0
- TreeSAK/tmp_2.py +19 -0
- TreeSAK/tmp_3.py +120 -0
- TreeSAK/weighted_rand.rb +23 -0
- treesak-1.51.2.data/scripts/TreeSAK +950 -0
- treesak-1.51.2.dist-info/LICENSE +674 -0
- treesak-1.51.2.dist-info/METADATA +27 -0
- treesak-1.51.2.dist-info/RECORD +125 -0
- treesak-1.51.2.dist-info/WHEEL +5 -0
- treesak-1.51.2.dist-info/top_level.txt +1 -0
TreeSAK/mcmc2tree.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import argparse
|
|
3
|
+
from ete3 import Tree
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
mcmc2tree_usage = '''
|
|
7
|
+
============ mcmc2tree example commands ============
|
|
8
|
+
|
|
9
|
+
TreeSAK mcmc2tree -i mcmctree.out -o renamed.tree
|
|
10
|
+
|
|
11
|
+
====================================================
|
|
12
|
+
'''
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def mcmc2tree(args):
|
|
16
|
+
|
|
17
|
+
mamctree_out = args['i']
|
|
18
|
+
tree_file = args['o']
|
|
19
|
+
|
|
20
|
+
if os.path.isfile(mamctree_out) is False:
|
|
21
|
+
print('%s not found, program exited!' % mamctree_out)
|
|
22
|
+
|
|
23
|
+
# get tree string from mcmctree_out
|
|
24
|
+
tree_str = ''
|
|
25
|
+
tree_line = 0
|
|
26
|
+
current_line = 1
|
|
27
|
+
for each_line in open(mamctree_out):
|
|
28
|
+
if 'Species tree for FigTree. Branch lengths = posterior mean times; 95% CIs = labels' in each_line:
|
|
29
|
+
tree_line = current_line + 1
|
|
30
|
+
if tree_line == current_line:
|
|
31
|
+
tree_str = each_line.strip()
|
|
32
|
+
current_line += 1
|
|
33
|
+
|
|
34
|
+
tree_str_no_space = tree_str.replace(' ', '')
|
|
35
|
+
|
|
36
|
+
# rename tree nodes
|
|
37
|
+
t = Tree(tree_str_no_space, format=1)
|
|
38
|
+
for each_node in t.traverse():
|
|
39
|
+
if each_node.is_leaf():
|
|
40
|
+
node_name_new = '_'.join(each_node.name.split('_')[1:])
|
|
41
|
+
else:
|
|
42
|
+
node_name_new = 't_n%s' % each_node.name
|
|
43
|
+
each_node.name = node_name_new
|
|
44
|
+
|
|
45
|
+
tree_str_renamed = t.write(format=8)
|
|
46
|
+
|
|
47
|
+
tree_file_handle = open(tree_file, 'w')
|
|
48
|
+
tree_file_handle.write(tree_str_renamed + '\n')
|
|
49
|
+
tree_file_handle.close()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == '__main__':
|
|
53
|
+
|
|
54
|
+
mcmc2tree_parser = argparse.ArgumentParser()
|
|
55
|
+
mcmc2tree_parser.add_argument('-i', required=True, help='the .out file from mcmctree')
|
|
56
|
+
mcmc2tree_parser.add_argument('-o', required=True, help='output tree file')
|
|
57
|
+
args = vars(mcmc2tree_parser.parse_args())
|
|
58
|
+
mcmc2tree(args)
|
TreeSAK/mcmcTC copy.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from ete3 import Tree
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
mcmcTC_usage = '''
|
|
6
|
+
===================== mcmcTC example commands =====================
|
|
7
|
+
|
|
8
|
+
TreeSAK mcmcTC -i in.tree -o out.tree -tc time_constraints.txt
|
|
9
|
+
|
|
10
|
+
# Format of constraint file (tab separated columns)
|
|
11
|
+
IMG2264867070_yang,GCF900696045_1_yang 3.46-4.38 Archaeal root
|
|
12
|
+
GCF000015225_1_yang,GCF000007225_1_yang -2.32 Oxygen Age Constraint, Thermoproteales
|
|
13
|
+
GCF000213215_1_yang,GCA000024305_1_yang -2.32 Oxygen Age Constraint, Sulfolobales
|
|
14
|
+
GCF000152265_2_yang,GCF000195915_1_yang -2.32 Oxygen Age Constraint, Thermoplasma
|
|
15
|
+
GCF000376445_1_yang,GCF000172995_2_yang -1.579 Chitin Age Constraint, Halobacteriales
|
|
16
|
+
GCF000195935_2_yang,GCF000151105_2_yang -1.579 Chitin Age Constraint, Thermococcales
|
|
17
|
+
GCA000802205_2_yang,GCA000200715_1 0.75-1.49 HGT from Viridiplantae to Thaumarchaeota
|
|
18
|
+
|
|
19
|
+
===================================================================
|
|
20
|
+
'''
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def mcmcTC(args):
|
|
24
|
+
|
|
25
|
+
tree_file_in = args['i']
|
|
26
|
+
time_constraint_txt = args['tc']
|
|
27
|
+
tree_file_out = args['o']
|
|
28
|
+
|
|
29
|
+
constraint_set = set()
|
|
30
|
+
constraint_dict = dict()
|
|
31
|
+
not_recognizable_time_constraint_set = set()
|
|
32
|
+
for each_constraint in open(time_constraint_txt):
|
|
33
|
+
each_constraint_split = each_constraint.strip().split('\t')
|
|
34
|
+
leaf_ids = each_constraint_split[0]
|
|
35
|
+
provided_age = each_constraint_split[1]
|
|
36
|
+
|
|
37
|
+
str_to_add = ''
|
|
38
|
+
if provided_age.startswith('-'):
|
|
39
|
+
str_to_add = '<%s' % provided_age[1:]
|
|
40
|
+
elif provided_age.endswith('-'):
|
|
41
|
+
str_to_add = '>%s' % provided_age[:-1]
|
|
42
|
+
elif '-' in provided_age:
|
|
43
|
+
provided_age_split = provided_age.split('-')
|
|
44
|
+
str_to_add = '>%s<%s' % (provided_age_split[0], provided_age_split[1])
|
|
45
|
+
else:
|
|
46
|
+
not_recognizable_time_constraint_set.add(provided_age)
|
|
47
|
+
|
|
48
|
+
constraint_dict[leaf_ids] = str_to_add
|
|
49
|
+
constraint_set.add(str_to_add)
|
|
50
|
+
|
|
51
|
+
if len(not_recognizable_time_constraint_set) > 0:
|
|
52
|
+
print('Format of the following time constraints are not recognizable, program exited')
|
|
53
|
+
print(','.join(not_recognizable_time_constraint_set))
|
|
54
|
+
exit()
|
|
55
|
+
|
|
56
|
+
# read in tree
|
|
57
|
+
tree_in = Tree(tree_file_in, quoted_node_names=True, format=1)
|
|
58
|
+
|
|
59
|
+
# add time constraints as node name
|
|
60
|
+
for each_node in constraint_dict:
|
|
61
|
+
node_age = constraint_dict[each_node]
|
|
62
|
+
node_split = each_node.split(',')
|
|
63
|
+
current_lca = tree_in.get_common_ancestor(node_split)
|
|
64
|
+
current_lca.add_features(custom_label=node_age)
|
|
65
|
+
current_lca.name = node_age
|
|
66
|
+
|
|
67
|
+
tree_out_str = tree_in.write(format=1)
|
|
68
|
+
|
|
69
|
+
# remove branch length of 1
|
|
70
|
+
tree_out_str = tree_out_str.replace(':1', '')
|
|
71
|
+
|
|
72
|
+
# quote constraint strings
|
|
73
|
+
for each_constraint in constraint_set:
|
|
74
|
+
tree_out_str = tree_out_str.replace(each_constraint, ("'%s'" % each_constraint))
|
|
75
|
+
|
|
76
|
+
# remove the most outside parenthesis
|
|
77
|
+
tree_out_str = tree_out_str[1:].replace(');', ';')
|
|
78
|
+
|
|
79
|
+
# write tree to file
|
|
80
|
+
with open(tree_file_out, 'w') as tree_file_out_handle:
|
|
81
|
+
tree_file_out_handle.write('%s\t1\n' % len(tree_in.get_leaves()))
|
|
82
|
+
tree_file_out_handle.write(tree_out_str)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
if __name__ == '__main__':
|
|
86
|
+
|
|
87
|
+
mcmcTC_parser = argparse.ArgumentParser()
|
|
88
|
+
mcmcTC_parser.add_argument('-i', required=True, help='input tree')
|
|
89
|
+
mcmcTC_parser.add_argument('-o', required=True, help='output tree')
|
|
90
|
+
mcmcTC_parser.add_argument('-tc', required=True, help='time constraint file')
|
|
91
|
+
args = vars(mcmcTC_parser.parse_args())
|
|
92
|
+
mcmcTC(args)
|
TreeSAK/mcmcTC.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
from ete3 import Tree
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
mcmcTC_usage = '''
|
|
6
|
+
======================================= mcmcTC example commands =======================================
|
|
7
|
+
|
|
8
|
+
TreeSAK mcmcTC -i in.tree -o out.tree -tc time_constraints.txt
|
|
9
|
+
|
|
10
|
+
# Format of constraint file (tab separated columns)
|
|
11
|
+
IMG2264867070_yang,GCF900696045_1_yang lca 3.46-4.38 Archaeal root
|
|
12
|
+
GCF000015225_1_yang,GCF000007225_1_yang lca -2.32 Oxygen Age Constraint, Thermoproteales
|
|
13
|
+
GCF000213215_1_yang,GCA000024305_1_yang lca -2.32 Oxygen Age Constraint, Sulfolobales
|
|
14
|
+
GCF000152265_2_yang,GCF000195915_1_yang lca -2.32 Oxygen Age Constraint, Thermoplasma
|
|
15
|
+
GCF000376445_1_yang,GCF000172995_2_yang lca -1.579 Chitin Age Constraint, Halobacteriales
|
|
16
|
+
GCF000195935_2_yang,GCF000151105_2_yang lca -1.579 Chitin Age Constraint, Thermococcales
|
|
17
|
+
GCA000802205_2_yang,GCF000303155_1_yang lca_p 0.75-1.49 HGT from Viridiplantae to Thaumarchaeota
|
|
18
|
+
|
|
19
|
+
# lca: last common ancestor
|
|
20
|
+
# lca_p: parent of last common ancestor
|
|
21
|
+
|
|
22
|
+
=======================================================================================================
|
|
23
|
+
'''
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def mcmcTC(args):
|
|
27
|
+
|
|
28
|
+
tree_file_in = args['i']
|
|
29
|
+
time_constraint_txt = args['tc']
|
|
30
|
+
tree_file_out = args['o']
|
|
31
|
+
|
|
32
|
+
constraint_set = set()
|
|
33
|
+
constraint_dict = dict()
|
|
34
|
+
constraint_type_dict = dict()
|
|
35
|
+
not_recognizable_time_constraint_set = set()
|
|
36
|
+
for each_constraint in open(time_constraint_txt):
|
|
37
|
+
each_constraint_split = each_constraint.strip().split('\t')
|
|
38
|
+
leaf_ids = each_constraint_split[0]
|
|
39
|
+
constraint_type = each_constraint_split[1]
|
|
40
|
+
provided_age = each_constraint_split[2]
|
|
41
|
+
|
|
42
|
+
str_to_add = ''
|
|
43
|
+
if provided_age.startswith('-'):
|
|
44
|
+
str_to_add = '<%s' % provided_age[1:]
|
|
45
|
+
elif provided_age.endswith('-'):
|
|
46
|
+
str_to_add = '>%s' % provided_age[:-1]
|
|
47
|
+
elif '-' in provided_age:
|
|
48
|
+
provided_age_split = provided_age.split('-')
|
|
49
|
+
str_to_add = '>%s<%s' % (provided_age_split[0], provided_age_split[1])
|
|
50
|
+
else:
|
|
51
|
+
not_recognizable_time_constraint_set.add(provided_age)
|
|
52
|
+
|
|
53
|
+
constraint_set.add(str_to_add)
|
|
54
|
+
constraint_dict[leaf_ids] = str_to_add
|
|
55
|
+
constraint_type_dict[leaf_ids] = constraint_type
|
|
56
|
+
|
|
57
|
+
if len(not_recognizable_time_constraint_set) > 0:
|
|
58
|
+
print('Format of the following time constraints are not recognizable, program exited')
|
|
59
|
+
print(','.join(not_recognizable_time_constraint_set))
|
|
60
|
+
exit()
|
|
61
|
+
|
|
62
|
+
# read in tree
|
|
63
|
+
tree_in = Tree(tree_file_in, quoted_node_names=True, format=1)
|
|
64
|
+
|
|
65
|
+
# add time constraints as node name
|
|
66
|
+
for each_node in constraint_dict:
|
|
67
|
+
node_age = constraint_dict[each_node]
|
|
68
|
+
node_split = each_node.split(',')
|
|
69
|
+
current_lca = tree_in.get_common_ancestor(node_split)
|
|
70
|
+
constraint_type = constraint_type_dict[each_node]
|
|
71
|
+
if constraint_type == 'lca':
|
|
72
|
+
current_lca.add_features(custom_label=node_age)
|
|
73
|
+
current_lca.name = node_age
|
|
74
|
+
elif constraint_type == 'lca_p':
|
|
75
|
+
current_lca_p = current_lca.up
|
|
76
|
+
current_lca_p.add_features(custom_label=node_age)
|
|
77
|
+
current_lca_p.name = node_age
|
|
78
|
+
|
|
79
|
+
tree_out_str = tree_in.write(format=1)
|
|
80
|
+
|
|
81
|
+
# remove branch length of 1
|
|
82
|
+
tree_out_str = tree_out_str.replace(':1', '')
|
|
83
|
+
|
|
84
|
+
# quote constraint strings
|
|
85
|
+
for each_constraint in constraint_set:
|
|
86
|
+
tree_out_str = tree_out_str.replace(each_constraint, ("'%s'" % each_constraint))
|
|
87
|
+
|
|
88
|
+
# remove the most outside parenthesis
|
|
89
|
+
tree_out_str = tree_out_str[1:].replace(');', ';')
|
|
90
|
+
|
|
91
|
+
# write tree to file
|
|
92
|
+
with open(tree_file_out, 'w') as tree_file_out_handle:
|
|
93
|
+
tree_file_out_handle.write('%s\t1\n' % len(tree_in.get_leaves()))
|
|
94
|
+
tree_file_out_handle.write(tree_out_str)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
if __name__ == '__main__':
|
|
98
|
+
|
|
99
|
+
mcmcTC_parser = argparse.ArgumentParser()
|
|
100
|
+
mcmcTC_parser.add_argument('-i', required=True, help='input tree')
|
|
101
|
+
mcmcTC_parser.add_argument('-o', required=True, help='output tree')
|
|
102
|
+
mcmcTC_parser.add_argument('-tc', required=True, help='time constraint file')
|
|
103
|
+
args = vars(mcmcTC_parser.parse_args())
|
|
104
|
+
mcmcTC(args)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
library(ggplot2)
|
|
2
|
+
library(optparse)
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
plot_grouped_HPD95 <- function(data_file, plot_width, plot_height, plot_file){
|
|
6
|
+
|
|
7
|
+
dat <- read.table(data_file, header = T)
|
|
8
|
+
|
|
9
|
+
ggplot(dat, aes(x = Node, y = Mean, ymin = Low, ymax = High)) +
|
|
10
|
+
geom_pointrange(aes(col = factor(ColorBy)),
|
|
11
|
+
position=position_dodge(width=0.6), # controls distance between groups
|
|
12
|
+
linewidth = 0.9, # line width
|
|
13
|
+
size=0.75) + # size of shape
|
|
14
|
+
theme_bw() + # remove background
|
|
15
|
+
theme(panel.grid.major=element_blank(), # remove grid
|
|
16
|
+
panel.grid.minor=element_blank()) + # remove grid
|
|
17
|
+
xlab("") + # x-axis label text
|
|
18
|
+
ylab("95% HPD CI") + # y-axis label text
|
|
19
|
+
theme(axis.text.x=element_text(size=12, color='black', angle=30, hjust=1), # x-axis label, rotate at an angle of 45
|
|
20
|
+
axis.text.y=element_text(size=12, color='black'), # y-axis label
|
|
21
|
+
legend.text=element_text(size=10)) + # legend label
|
|
22
|
+
scale_color_discrete(name="Color") + # customize color legend, title
|
|
23
|
+
guides(color=guide_legend(override.aes=list(linetype=0))) + # customize color legend
|
|
24
|
+
coord_flip() # # plot vertically
|
|
25
|
+
|
|
26
|
+
# write to file
|
|
27
|
+
ggsave(plot_file, width=plot_width, height=plot_height, dpi=300)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
option_list = list(
|
|
32
|
+
make_option(c("-i", "--datain"), type="character", default=NULL, help="input data matrix"),
|
|
33
|
+
make_option(c("-x", "--width"), type="double", default=8, help="plot width"),
|
|
34
|
+
make_option(c("-y", "--height"), type="double", default=5, help="plot height"),
|
|
35
|
+
make_option(c("-o", "--plotout"), type="character", default=NULL, help="output plot"));
|
|
36
|
+
|
|
37
|
+
opt_parser = OptionParser(option_list=option_list);
|
|
38
|
+
opt = parse_args(opt_parser);
|
|
39
|
+
data_matrix_txt = opt$datain
|
|
40
|
+
plot_width = opt$width
|
|
41
|
+
plot_height = opt$height
|
|
42
|
+
output_plot = opt$plotout
|
|
43
|
+
|
|
44
|
+
plot_grouped_HPD95(data_matrix_txt, plot_width, plot_height, output_plot)
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import argparse
|
|
3
|
+
from ete3 import Tree
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
mcmctree_vs_reltime_usage = '''
|
|
7
|
+
====================== mcmctree_vs_reltime example command ======================
|
|
8
|
+
|
|
9
|
+
TreeSAK mcmctree_vs_reltime -m mcmc.out -r reltime.txt -n nodes.txt -o ages.pdf
|
|
10
|
+
|
|
11
|
+
# Example data
|
|
12
|
+
https://github.com/songweizhi/TreeSAK/tree/master/DemoData/mcmctree_vs_reltime
|
|
13
|
+
|
|
14
|
+
=================================================================================
|
|
15
|
+
'''
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def sep_path_basename_ext(file_in):
|
|
19
|
+
|
|
20
|
+
f_path, f_name = os.path.split(file_in)
|
|
21
|
+
if f_path == '':
|
|
22
|
+
f_path = '.'
|
|
23
|
+
f_base, f_ext = os.path.splitext(f_name)
|
|
24
|
+
|
|
25
|
+
return f_name, f_path, f_base, f_ext[1:]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_lca(reltime_txt, leaf_1_name, leaf_2_name):
|
|
29
|
+
|
|
30
|
+
leaf_set = set()
|
|
31
|
+
child_to_parent_dict = dict()
|
|
32
|
+
id_to_name_dict = dict()
|
|
33
|
+
name_to_id_dict = dict()
|
|
34
|
+
for each_line in open(reltime_txt):
|
|
35
|
+
if not each_line.startswith('NodeLabel'):
|
|
36
|
+
each_line_split = each_line.strip().split('\t')
|
|
37
|
+
each_line_split = [i.strip() for i in each_line_split]
|
|
38
|
+
if len(each_line_split) > 1:
|
|
39
|
+
node_name = each_line_split[0].replace(' ', '_')
|
|
40
|
+
node_id = each_line_split[1]
|
|
41
|
+
des1 = each_line_split[2]
|
|
42
|
+
des2 = each_line_split[3]
|
|
43
|
+
id_to_name_dict[node_id] = node_name
|
|
44
|
+
name_to_id_dict[node_name] = node_id
|
|
45
|
+
child_to_parent_dict[des1] = node_id
|
|
46
|
+
child_to_parent_dict[des2] = node_id
|
|
47
|
+
if (des1 == '-') and (des2 == '-'):
|
|
48
|
+
leaf_set.add(node_id)
|
|
49
|
+
|
|
50
|
+
leaf_to_lineage_dict = dict()
|
|
51
|
+
for leaf in sorted([i for i in leaf_set]):
|
|
52
|
+
original_leaf = leaf
|
|
53
|
+
lineage_list = [leaf]
|
|
54
|
+
while leaf in child_to_parent_dict:
|
|
55
|
+
leaf_p = child_to_parent_dict[leaf]
|
|
56
|
+
lineage_list.append(leaf_p)
|
|
57
|
+
leaf = leaf_p
|
|
58
|
+
leaf_to_lineage_dict[original_leaf] = lineage_list
|
|
59
|
+
|
|
60
|
+
leaf_1_id = name_to_id_dict[leaf_1_name]
|
|
61
|
+
leaf_2_id = name_to_id_dict[leaf_2_name]
|
|
62
|
+
leaf_1_linage = leaf_to_lineage_dict[leaf_1_id]
|
|
63
|
+
leaf_2_linage = leaf_to_lineage_dict[leaf_2_id]
|
|
64
|
+
|
|
65
|
+
lca = ''
|
|
66
|
+
for each_p in leaf_1_linage[::-1]:
|
|
67
|
+
if each_p in leaf_2_linage:
|
|
68
|
+
lca = each_p
|
|
69
|
+
|
|
70
|
+
return lca
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def parse_reltime(reltime_txt, interested_nodes_txt, op_txt):
|
|
74
|
+
|
|
75
|
+
lca_to_leaves_dict = dict()
|
|
76
|
+
interested_node_desc_dict = dict()
|
|
77
|
+
for interested_node in open(interested_nodes_txt):
|
|
78
|
+
interested_node_split = interested_node.strip().split('\t')
|
|
79
|
+
paired_leaves = interested_node_split[0]
|
|
80
|
+
interested_node_desc = paired_leaves
|
|
81
|
+
if len(interested_node_split) > 1:
|
|
82
|
+
interested_node_desc = interested_node_split[1]
|
|
83
|
+
interested_node_desc_dict[paired_leaves] = interested_node_desc
|
|
84
|
+
leaf_1 = paired_leaves.split(',')[0]
|
|
85
|
+
leaf_2 = paired_leaves.split(',')[1]
|
|
86
|
+
lca_id = get_lca(reltime_txt, leaf_1, leaf_2)
|
|
87
|
+
lca_to_leaves_dict[lca_id] = paired_leaves.strip()
|
|
88
|
+
|
|
89
|
+
op_txt_handle = open(op_txt, 'w')
|
|
90
|
+
line_num_index = 0
|
|
91
|
+
for each_line in open(reltime_txt):
|
|
92
|
+
each_line_split = each_line.strip().split('\t')
|
|
93
|
+
each_line_split = [i.strip() for i in each_line_split]
|
|
94
|
+
if line_num_index == 0:
|
|
95
|
+
op_txt_handle.write('ColorBy\tNode\tMean\tLow\tHigh\n')
|
|
96
|
+
else:
|
|
97
|
+
if len(each_line_split) > 1:
|
|
98
|
+
node_id = each_line_split[1]
|
|
99
|
+
if node_id in lca_to_leaves_dict:
|
|
100
|
+
node_id = each_line_split[1]
|
|
101
|
+
div_time = each_line_split[7]
|
|
102
|
+
ci_lower = each_line_split[8]
|
|
103
|
+
ci_upper = each_line_split[9]
|
|
104
|
+
corresponding_leaves = lca_to_leaves_dict[node_id]
|
|
105
|
+
interested_node_desc = interested_node_desc_dict[corresponding_leaves]
|
|
106
|
+
op_txt_handle.write('RelTime\t%s\t%s\t%s\t%s\n' % (interested_node_desc, div_time, ci_lower, ci_upper))
|
|
107
|
+
line_num_index += 1
|
|
108
|
+
op_txt_handle.close()
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def mcmctree_out_to_tree_str(mamctree_out):
|
|
112
|
+
|
|
113
|
+
# get tree string from mamctree_out
|
|
114
|
+
tree_str = ''
|
|
115
|
+
tree_line = 0
|
|
116
|
+
current_line = 1
|
|
117
|
+
for each_line in open(mamctree_out):
|
|
118
|
+
if 'Species tree for FigTree. Branch lengths = posterior mean times; 95% CIs = labels' in each_line:
|
|
119
|
+
tree_line = current_line + 1
|
|
120
|
+
if tree_line == current_line:
|
|
121
|
+
tree_str = each_line.strip()
|
|
122
|
+
current_line += 1
|
|
123
|
+
|
|
124
|
+
tree_str_no_space = tree_str.replace(' ', '')
|
|
125
|
+
|
|
126
|
+
# rename tree nodes
|
|
127
|
+
t = Tree(tree_str_no_space, format=1)
|
|
128
|
+
for each_node in t.traverse():
|
|
129
|
+
if each_node.is_leaf():
|
|
130
|
+
node_name_new = '_'.join(each_node.name.split('_')[1:])
|
|
131
|
+
else:
|
|
132
|
+
node_name_new = 't_n%s' % each_node.name
|
|
133
|
+
each_node.name = node_name_new
|
|
134
|
+
|
|
135
|
+
tree_str_renamed = t.write(format=8)
|
|
136
|
+
|
|
137
|
+
return tree_str_renamed
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_internal_node_to_plot(node_txt, mo_file):
|
|
141
|
+
|
|
142
|
+
tree_str = ''
|
|
143
|
+
if os.path.isfile(mo_file):
|
|
144
|
+
tree_str = mcmctree_out_to_tree_str(mo_file)
|
|
145
|
+
|
|
146
|
+
# get nodes to plot
|
|
147
|
+
node_set = set()
|
|
148
|
+
node_rename_dict = dict()
|
|
149
|
+
if os.path.isfile(node_txt) is True:
|
|
150
|
+
for each in open(node_txt):
|
|
151
|
+
each_split = each.strip().split('\t')
|
|
152
|
+
node_str = each_split[0]
|
|
153
|
+
|
|
154
|
+
# get internal_node_to_plot
|
|
155
|
+
internal_node_to_plot = ''
|
|
156
|
+
if ',' not in node_str:
|
|
157
|
+
internal_node_to_plot = each_split[0]
|
|
158
|
+
else:
|
|
159
|
+
leaf_list = node_str.split(',')
|
|
160
|
+
if tree_str == '':
|
|
161
|
+
print('MCMCTree out file not found, program exited!')
|
|
162
|
+
exit()
|
|
163
|
+
current_lca = Tree(tree_str, format=1).get_common_ancestor(leaf_list)
|
|
164
|
+
internal_node_to_plot = current_lca.name
|
|
165
|
+
|
|
166
|
+
# add internal_node_to_plot to node_set
|
|
167
|
+
if internal_node_to_plot != '':
|
|
168
|
+
node_set.add(internal_node_to_plot)
|
|
169
|
+
|
|
170
|
+
# read in name to show in plot
|
|
171
|
+
if len(each_split) == 2:
|
|
172
|
+
if each_split[1] != '':
|
|
173
|
+
node_rename_dict[internal_node_to_plot] = each_split[1]
|
|
174
|
+
else:
|
|
175
|
+
node_set = node_txt.split(',')
|
|
176
|
+
|
|
177
|
+
return node_set, node_rename_dict, tree_str
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def read_in_posterior_mean(mcmctree_out):
|
|
181
|
+
|
|
182
|
+
# read in Posterior mean
|
|
183
|
+
node_to_mean_hpd95_dict = dict()
|
|
184
|
+
current_line = 1
|
|
185
|
+
posterior_mean_header_line = 0
|
|
186
|
+
for each_line in open(mcmctree_out):
|
|
187
|
+
if 'Posterior mean (95% Equal-tail CI) (95% HPD CI) HPD-CI-width' in each_line:
|
|
188
|
+
posterior_mean_header_line = current_line
|
|
189
|
+
|
|
190
|
+
if (posterior_mean_header_line != 0) and (current_line > posterior_mean_header_line):
|
|
191
|
+
each_line_split = each_line.strip().split(' ')
|
|
192
|
+
|
|
193
|
+
each_line_split_no_empty = []
|
|
194
|
+
for each_element in each_line_split:
|
|
195
|
+
if each_element not in ['', '(']:
|
|
196
|
+
each_element_value = each_element.replace('(', '').replace(')', '').replace(',', '')
|
|
197
|
+
each_line_split_no_empty.append(each_element_value)
|
|
198
|
+
if len(each_line_split_no_empty) == 9:
|
|
199
|
+
node_id = each_line_split_no_empty[0]
|
|
200
|
+
value_mean = each_line_split_no_empty[1]
|
|
201
|
+
value_hpd95_small = each_line_split_no_empty[4]
|
|
202
|
+
value_hpd95_big = each_line_split_no_empty[5]
|
|
203
|
+
node_to_mean_hpd95_dict[node_id] = [value_mean, value_hpd95_small, value_hpd95_big]
|
|
204
|
+
current_line += 1
|
|
205
|
+
|
|
206
|
+
return node_to_mean_hpd95_dict
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def parse_mcmc_out(mcmc_out_file, node_txt, dm_out):
|
|
210
|
+
|
|
211
|
+
dm_out_handle = open(dm_out, 'a')
|
|
212
|
+
#dm_out_handle.write('Test\tShape\tVar\tMean\tLow\tHigh\n')
|
|
213
|
+
node_set, node_rename_dict, tree_str = get_internal_node_to_plot(node_txt, mcmc_out_file)
|
|
214
|
+
node_to_mean_95_hpd_dict = read_in_posterior_mean(mcmc_out_file)
|
|
215
|
+
for each_node in node_set:
|
|
216
|
+
node_name_to_write = node_rename_dict.get(each_node, each_node)
|
|
217
|
+
mean_95_hpd_list = node_to_mean_95_hpd_dict.get(each_node)
|
|
218
|
+
dm_out_handle.write('MCMCTree\t%s\t%s\n' % (node_name_to_write, '\t'.join(mean_95_hpd_list)))
|
|
219
|
+
dm_out_handle.close()
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def mcmctree_vs_reltime(args):
|
|
223
|
+
|
|
224
|
+
mcmc_out_file = args['m']
|
|
225
|
+
reltime_txt = args['r']
|
|
226
|
+
interested_nodes_txt = args['n']
|
|
227
|
+
pdf_out = args['o']
|
|
228
|
+
|
|
229
|
+
dm_out_combined = '.'.join(pdf_out.split('.')[:-1]) + '.txt'
|
|
230
|
+
|
|
231
|
+
# define mcmctree_vs_reltime_R
|
|
232
|
+
pwd_current_file = os.path.realpath(__file__)
|
|
233
|
+
current_file_path = '/'.join(pwd_current_file.split('/')[:-1])
|
|
234
|
+
mcmctree_vs_reltime_R = '%s/mcmctree_vs_reltime.R' % current_file_path
|
|
235
|
+
|
|
236
|
+
parse_reltime(reltime_txt, interested_nodes_txt, dm_out_combined)
|
|
237
|
+
parse_mcmc_out(mcmc_out_file, interested_nodes_txt, dm_out_combined)
|
|
238
|
+
|
|
239
|
+
plot_cmd = 'Rscript %s -i %s -x %s -y %s -o %s' % (mcmctree_vs_reltime_R, dm_out_combined, 8, 5, pdf_out)
|
|
240
|
+
os.system(plot_cmd)
|
|
241
|
+
print('Plot exported to: %s' % pdf_out)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
if __name__ == '__main__':
|
|
245
|
+
|
|
246
|
+
mcmctree_vs_reltime_parser = argparse.ArgumentParser()
|
|
247
|
+
mcmctree_vs_reltime_parser.add_argument('-m', required=True, help='.out file from MCMCTree')
|
|
248
|
+
mcmctree_vs_reltime_parser.add_argument('-r', required=True, help='output from elTime')
|
|
249
|
+
mcmctree_vs_reltime_parser.add_argument('-n', required=True, help='interested nodes txt file')
|
|
250
|
+
mcmctree_vs_reltime_parser.add_argument('-o', required=True, help='output pdf')
|
|
251
|
+
args = vars(mcmctree_vs_reltime_parser.parse_args())
|
|
252
|
+
mcmctree_vs_reltime(args)
|
TreeSAK/merge_pdf.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import glob
|
|
3
|
+
import math
|
|
4
|
+
import random
|
|
5
|
+
import argparse
|
|
6
|
+
import seaborn as sns
|
|
7
|
+
from ete3 import Tree
|
|
8
|
+
from itolapi import Itol
|
|
9
|
+
from PyPDF3.pdf import PageObject
|
|
10
|
+
from PyPDF3 import PdfFileWriter, PdfFileReader
|
|
11
|
+
from PyPDF3.generic import RectangleObject
|
|
12
|
+
# https://pypdf2.readthedocs.io/en/3.0.0/user/adding-pdf-annotations.html
|
|
13
|
+
|
|
14
|
+
def merge_pdf(pdf_1, pdf_2, margin_size, op_pdf):
|
|
15
|
+
|
|
16
|
+
page1 = PdfFileReader(open(pdf_1, "rb"), strict=False).getPage(0)
|
|
17
|
+
page2 = PdfFileReader(open(pdf_2, "rb"), strict=False).getPage(0)
|
|
18
|
+
|
|
19
|
+
total_width = page1.mediaBox.upperRight[0] + page2.mediaBox.upperRight[0] + margin_size*3
|
|
20
|
+
total_height = max([page1.mediaBox.upperRight[1], page2.mediaBox.upperRight[1]]) + margin_size*2
|
|
21
|
+
new_page = PageObject.createBlankPage(None, total_width, total_height)
|
|
22
|
+
new_page.mergeTranslatedPage(page1, margin_size, (total_height-margin_size-page1.mediaBox.upperRight[1]))
|
|
23
|
+
new_page.mergeTranslatedPage(page2, (page1.mediaBox.upperRight[0] + margin_size*2), margin_size)
|
|
24
|
+
output = PdfFileWriter()
|
|
25
|
+
output.addPage(new_page)
|
|
26
|
+
output.write(open(op_pdf, "wb"))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
merge_pdf('/Users/songweizhi/Desktop/1.pdf',
|
|
30
|
+
'/Users/songweizhi/Desktop/2.pdf',
|
|
31
|
+
66,
|
|
32
|
+
'/Users/songweizhi/Desktop/merged.pdf')
|
TreeSAK/pRTC.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import argparse
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
pRTC_usage = '''
|
|
6
|
+
==================================== pRTC example commands ====================================
|
|
7
|
+
|
|
8
|
+
# requires: ruby (bio-nwk, colorize, parallel, csv)
|
|
9
|
+
|
|
10
|
+
TreeSAK pRTC -i out -m mcmc.txt -r rtc_dir -o after_pRTC_mcmc.txt
|
|
11
|
+
|
|
12
|
+
python3 # format of RTC file (tab separated)
|
|
13
|
+
symbiont_1,symbiont_2 host_1,host_2:0.9876
|
|
14
|
+
recipient1,recipient2 donor1,donor2:0.6789
|
|
15
|
+
|
|
16
|
+
# The above two lines can be interpreted as follows:
|
|
17
|
+
The probability of the last common ancestor (LCA) of symbiont_1 and symbiont_2 being
|
|
18
|
+
younger than that of host_1 and host_2 is 0.9876. Similarly, for a gene transfer event,
|
|
19
|
+
the LCA of the two recipients is younger than that of the two donors, the value 0.6789
|
|
20
|
+
is the PROBABILITY of the occurrence of the transfer event. Please do NOT use this module
|
|
21
|
+
if you are unsure about the interpretation of the value you provided.
|
|
22
|
+
|
|
23
|
+
# Note
|
|
24
|
+
This is a Python wrapper to perform the probabilistic RTC dating proposed by Dr. Sishuo Wang.
|
|
25
|
+
If you used it in your dating analysis, please cite:
|
|
26
|
+
https://doi.org/10.1101/2023.06.18.545440 or https://github.com/evolbeginner/rrtc.
|
|
27
|
+
|
|
28
|
+
===============================================================================================
|
|
29
|
+
'''
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def pRTC(args):
|
|
33
|
+
|
|
34
|
+
in_file = args['i']
|
|
35
|
+
mcmc_txt = args['m']
|
|
36
|
+
rrtc_dir = args['r']
|
|
37
|
+
ruby_exe = args['ruby']
|
|
38
|
+
op_txt = args['o']
|
|
39
|
+
|
|
40
|
+
current_file_path = '/'.join(os.path.realpath(__file__).split('/')[:-1])
|
|
41
|
+
do_rrtc_rb = '%s/do_rrtc.rb' % current_file_path
|
|
42
|
+
do_rrtc_cmd = '%s %s --mcmctxt %s -i %s --rrtc %s > %s' % (ruby_exe, do_rrtc_rb, mcmc_txt, in_file, rrtc_dir, op_txt)
|
|
43
|
+
print('Running: ' + do_rrtc_cmd)
|
|
44
|
+
os.system(do_rrtc_cmd)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
if __name__ == '__main__':
|
|
48
|
+
|
|
49
|
+
pRTC_parser = argparse.ArgumentParser(usage=pRTC_usage)
|
|
50
|
+
pRTC_parser.add_argument('-i', required=True, help='the file "out" generated by MCMCTree')
|
|
51
|
+
pRTC_parser.add_argument('-m', required=True, help='the file "mcmc.txt" generated by MCMCTree')
|
|
52
|
+
pRTC_parser.add_argument('-r', required=True, help='the folder that contains all RTCs')
|
|
53
|
+
pRTC_parser.add_argument('-o', required=True, help='output txt file')
|
|
54
|
+
pRTC_parser.add_argument('-ruby', required=False, default='ruby', help='path to ruby executable file, default: ruby')
|
|
55
|
+
args = vars(pRTC_parser.parse_args())
|
|
56
|
+
pRTC(args)
|