treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
TreeSAK/mcmc2tree.py ADDED
@@ -0,0 +1,58 @@
1
+ import os
2
+ import argparse
3
+ from ete3 import Tree
4
+
5
+
6
+ mcmc2tree_usage = '''
7
+ ============ mcmc2tree example commands ============
8
+
9
+ TreeSAK mcmc2tree -i mcmctree.out -o renamed.tree
10
+
11
+ ====================================================
12
+ '''
13
+
14
+
15
+ def mcmc2tree(args):
16
+
17
+ mamctree_out = args['i']
18
+ tree_file = args['o']
19
+
20
+ if os.path.isfile(mamctree_out) is False:
21
+ print('%s not found, program exited!' % mamctree_out)
22
+
23
+ # get tree string from mcmctree_out
24
+ tree_str = ''
25
+ tree_line = 0
26
+ current_line = 1
27
+ for each_line in open(mamctree_out):
28
+ if 'Species tree for FigTree. Branch lengths = posterior mean times; 95% CIs = labels' in each_line:
29
+ tree_line = current_line + 1
30
+ if tree_line == current_line:
31
+ tree_str = each_line.strip()
32
+ current_line += 1
33
+
34
+ tree_str_no_space = tree_str.replace(' ', '')
35
+
36
+ # rename tree nodes
37
+ t = Tree(tree_str_no_space, format=1)
38
+ for each_node in t.traverse():
39
+ if each_node.is_leaf():
40
+ node_name_new = '_'.join(each_node.name.split('_')[1:])
41
+ else:
42
+ node_name_new = 't_n%s' % each_node.name
43
+ each_node.name = node_name_new
44
+
45
+ tree_str_renamed = t.write(format=8)
46
+
47
+ tree_file_handle = open(tree_file, 'w')
48
+ tree_file_handle.write(tree_str_renamed + '\n')
49
+ tree_file_handle.close()
50
+
51
+
52
+ if __name__ == '__main__':
53
+
54
+ mcmc2tree_parser = argparse.ArgumentParser()
55
+ mcmc2tree_parser.add_argument('-i', required=True, help='the .out file from mcmctree')
56
+ mcmc2tree_parser.add_argument('-o', required=True, help='output tree file')
57
+ args = vars(mcmc2tree_parser.parse_args())
58
+ mcmc2tree(args)
@@ -0,0 +1,92 @@
1
+ import argparse
2
+ from ete3 import Tree
3
+
4
+
5
+ mcmcTC_usage = '''
6
+ ===================== mcmcTC example commands =====================
7
+
8
+ TreeSAK mcmcTC -i in.tree -o out.tree -tc time_constraints.txt
9
+
10
+ # Format of constraint file (tab separated columns)
11
+ IMG2264867070_yang,GCF900696045_1_yang 3.46-4.38 Archaeal root
12
+ GCF000015225_1_yang,GCF000007225_1_yang -2.32 Oxygen Age Constraint, Thermoproteales
13
+ GCF000213215_1_yang,GCA000024305_1_yang -2.32 Oxygen Age Constraint, Sulfolobales
14
+ GCF000152265_2_yang,GCF000195915_1_yang -2.32 Oxygen Age Constraint, Thermoplasma
15
+ GCF000376445_1_yang,GCF000172995_2_yang -1.579 Chitin Age Constraint, Halobacteriales
16
+ GCF000195935_2_yang,GCF000151105_2_yang -1.579 Chitin Age Constraint, Thermococcales
17
+ GCA000802205_2_yang,GCA000200715_1 0.75-1.49 HGT from Viridiplantae to Thaumarchaeota
18
+
19
+ ===================================================================
20
+ '''
21
+
22
+
23
+ def mcmcTC(args):
24
+
25
+ tree_file_in = args['i']
26
+ time_constraint_txt = args['tc']
27
+ tree_file_out = args['o']
28
+
29
+ constraint_set = set()
30
+ constraint_dict = dict()
31
+ not_recognizable_time_constraint_set = set()
32
+ for each_constraint in open(time_constraint_txt):
33
+ each_constraint_split = each_constraint.strip().split('\t')
34
+ leaf_ids = each_constraint_split[0]
35
+ provided_age = each_constraint_split[1]
36
+
37
+ str_to_add = ''
38
+ if provided_age.startswith('-'):
39
+ str_to_add = '<%s' % provided_age[1:]
40
+ elif provided_age.endswith('-'):
41
+ str_to_add = '>%s' % provided_age[:-1]
42
+ elif '-' in provided_age:
43
+ provided_age_split = provided_age.split('-')
44
+ str_to_add = '>%s<%s' % (provided_age_split[0], provided_age_split[1])
45
+ else:
46
+ not_recognizable_time_constraint_set.add(provided_age)
47
+
48
+ constraint_dict[leaf_ids] = str_to_add
49
+ constraint_set.add(str_to_add)
50
+
51
+ if len(not_recognizable_time_constraint_set) > 0:
52
+ print('Format of the following time constraints are not recognizable, program exited')
53
+ print(','.join(not_recognizable_time_constraint_set))
54
+ exit()
55
+
56
+ # read in tree
57
+ tree_in = Tree(tree_file_in, quoted_node_names=True, format=1)
58
+
59
+ # add time constraints as node name
60
+ for each_node in constraint_dict:
61
+ node_age = constraint_dict[each_node]
62
+ node_split = each_node.split(',')
63
+ current_lca = tree_in.get_common_ancestor(node_split)
64
+ current_lca.add_features(custom_label=node_age)
65
+ current_lca.name = node_age
66
+
67
+ tree_out_str = tree_in.write(format=1)
68
+
69
+ # remove branch length of 1
70
+ tree_out_str = tree_out_str.replace(':1', '')
71
+
72
+ # quote constraint strings
73
+ for each_constraint in constraint_set:
74
+ tree_out_str = tree_out_str.replace(each_constraint, ("'%s'" % each_constraint))
75
+
76
+ # remove the most outside parenthesis
77
+ tree_out_str = tree_out_str[1:].replace(');', ';')
78
+
79
+ # write tree to file
80
+ with open(tree_file_out, 'w') as tree_file_out_handle:
81
+ tree_file_out_handle.write('%s\t1\n' % len(tree_in.get_leaves()))
82
+ tree_file_out_handle.write(tree_out_str)
83
+
84
+
85
+ if __name__ == '__main__':
86
+
87
+ mcmcTC_parser = argparse.ArgumentParser()
88
+ mcmcTC_parser.add_argument('-i', required=True, help='input tree')
89
+ mcmcTC_parser.add_argument('-o', required=True, help='output tree')
90
+ mcmcTC_parser.add_argument('-tc', required=True, help='time constraint file')
91
+ args = vars(mcmcTC_parser.parse_args())
92
+ mcmcTC(args)
TreeSAK/mcmcTC.py ADDED
@@ -0,0 +1,104 @@
1
+ import argparse
2
+ from ete3 import Tree
3
+
4
+
5
+ mcmcTC_usage = '''
6
+ ======================================= mcmcTC example commands =======================================
7
+
8
+ TreeSAK mcmcTC -i in.tree -o out.tree -tc time_constraints.txt
9
+
10
+ # Format of constraint file (tab separated columns)
11
+ IMG2264867070_yang,GCF900696045_1_yang lca 3.46-4.38 Archaeal root
12
+ GCF000015225_1_yang,GCF000007225_1_yang lca -2.32 Oxygen Age Constraint, Thermoproteales
13
+ GCF000213215_1_yang,GCA000024305_1_yang lca -2.32 Oxygen Age Constraint, Sulfolobales
14
+ GCF000152265_2_yang,GCF000195915_1_yang lca -2.32 Oxygen Age Constraint, Thermoplasma
15
+ GCF000376445_1_yang,GCF000172995_2_yang lca -1.579 Chitin Age Constraint, Halobacteriales
16
+ GCF000195935_2_yang,GCF000151105_2_yang lca -1.579 Chitin Age Constraint, Thermococcales
17
+ GCA000802205_2_yang,GCF000303155_1_yang lca_p 0.75-1.49 HGT from Viridiplantae to Thaumarchaeota
18
+
19
+ # lca: last common ancestor
20
+ # lca_p: parent of last common ancestor
21
+
22
+ =======================================================================================================
23
+ '''
24
+
25
+
26
+ def mcmcTC(args):
27
+
28
+ tree_file_in = args['i']
29
+ time_constraint_txt = args['tc']
30
+ tree_file_out = args['o']
31
+
32
+ constraint_set = set()
33
+ constraint_dict = dict()
34
+ constraint_type_dict = dict()
35
+ not_recognizable_time_constraint_set = set()
36
+ for each_constraint in open(time_constraint_txt):
37
+ each_constraint_split = each_constraint.strip().split('\t')
38
+ leaf_ids = each_constraint_split[0]
39
+ constraint_type = each_constraint_split[1]
40
+ provided_age = each_constraint_split[2]
41
+
42
+ str_to_add = ''
43
+ if provided_age.startswith('-'):
44
+ str_to_add = '<%s' % provided_age[1:]
45
+ elif provided_age.endswith('-'):
46
+ str_to_add = '>%s' % provided_age[:-1]
47
+ elif '-' in provided_age:
48
+ provided_age_split = provided_age.split('-')
49
+ str_to_add = '>%s<%s' % (provided_age_split[0], provided_age_split[1])
50
+ else:
51
+ not_recognizable_time_constraint_set.add(provided_age)
52
+
53
+ constraint_set.add(str_to_add)
54
+ constraint_dict[leaf_ids] = str_to_add
55
+ constraint_type_dict[leaf_ids] = constraint_type
56
+
57
+ if len(not_recognizable_time_constraint_set) > 0:
58
+ print('Format of the following time constraints are not recognizable, program exited')
59
+ print(','.join(not_recognizable_time_constraint_set))
60
+ exit()
61
+
62
+ # read in tree
63
+ tree_in = Tree(tree_file_in, quoted_node_names=True, format=1)
64
+
65
+ # add time constraints as node name
66
+ for each_node in constraint_dict:
67
+ node_age = constraint_dict[each_node]
68
+ node_split = each_node.split(',')
69
+ current_lca = tree_in.get_common_ancestor(node_split)
70
+ constraint_type = constraint_type_dict[each_node]
71
+ if constraint_type == 'lca':
72
+ current_lca.add_features(custom_label=node_age)
73
+ current_lca.name = node_age
74
+ elif constraint_type == 'lca_p':
75
+ current_lca_p = current_lca.up
76
+ current_lca_p.add_features(custom_label=node_age)
77
+ current_lca_p.name = node_age
78
+
79
+ tree_out_str = tree_in.write(format=1)
80
+
81
+ # remove branch length of 1
82
+ tree_out_str = tree_out_str.replace(':1', '')
83
+
84
+ # quote constraint strings
85
+ for each_constraint in constraint_set:
86
+ tree_out_str = tree_out_str.replace(each_constraint, ("'%s'" % each_constraint))
87
+
88
+ # remove the most outside parenthesis
89
+ tree_out_str = tree_out_str[1:].replace(');', ';')
90
+
91
+ # write tree to file
92
+ with open(tree_file_out, 'w') as tree_file_out_handle:
93
+ tree_file_out_handle.write('%s\t1\n' % len(tree_in.get_leaves()))
94
+ tree_file_out_handle.write(tree_out_str)
95
+
96
+
97
+ if __name__ == '__main__':
98
+
99
+ mcmcTC_parser = argparse.ArgumentParser()
100
+ mcmcTC_parser.add_argument('-i', required=True, help='input tree')
101
+ mcmcTC_parser.add_argument('-o', required=True, help='output tree')
102
+ mcmcTC_parser.add_argument('-tc', required=True, help='time constraint file')
103
+ args = vars(mcmcTC_parser.parse_args())
104
+ mcmcTC(args)
@@ -0,0 +1,44 @@
1
+ library(ggplot2)
2
+ library(optparse)
3
+
4
+
5
+ plot_grouped_HPD95 <- function(data_file, plot_width, plot_height, plot_file){
6
+
7
+ dat <- read.table(data_file, header = T)
8
+
9
+ ggplot(dat, aes(x = Node, y = Mean, ymin = Low, ymax = High)) +
10
+ geom_pointrange(aes(col = factor(ColorBy)),
11
+ position=position_dodge(width=0.6), # controls distance between groups
12
+ linewidth = 0.9, # line width
13
+ size=0.75) + # size of shape
14
+ theme_bw() + # remove background
15
+ theme(panel.grid.major=element_blank(), # remove grid
16
+ panel.grid.minor=element_blank()) + # remove grid
17
+ xlab("") + # x-axis label text
18
+ ylab("95% HPD CI") + # y-axis label text
19
+ theme(axis.text.x=element_text(size=12, color='black', angle=30, hjust=1), # x-axis label, rotate at an angle of 45
20
+ axis.text.y=element_text(size=12, color='black'), # y-axis label
21
+ legend.text=element_text(size=10)) + # legend label
22
+ scale_color_discrete(name="Color") + # customize color legend, title
23
+ guides(color=guide_legend(override.aes=list(linetype=0))) + # customize color legend
24
+ coord_flip() # # plot vertically
25
+
26
+ # write to file
27
+ ggsave(plot_file, width=plot_width, height=plot_height, dpi=300)
28
+ }
29
+
30
+
31
+ option_list = list(
32
+ make_option(c("-i", "--datain"), type="character", default=NULL, help="input data matrix"),
33
+ make_option(c("-x", "--width"), type="double", default=8, help="plot width"),
34
+ make_option(c("-y", "--height"), type="double", default=5, help="plot height"),
35
+ make_option(c("-o", "--plotout"), type="character", default=NULL, help="output plot"));
36
+
37
+ opt_parser = OptionParser(option_list=option_list);
38
+ opt = parse_args(opt_parser);
39
+ data_matrix_txt = opt$datain
40
+ plot_width = opt$width
41
+ plot_height = opt$height
42
+ output_plot = opt$plotout
43
+
44
+ plot_grouped_HPD95(data_matrix_txt, plot_width, plot_height, output_plot)
@@ -0,0 +1,252 @@
1
+ import os
2
+ import argparse
3
+ from ete3 import Tree
4
+
5
+
6
+ mcmctree_vs_reltime_usage = '''
7
+ ====================== mcmctree_vs_reltime example command ======================
8
+
9
+ TreeSAK mcmctree_vs_reltime -m mcmc.out -r reltime.txt -n nodes.txt -o ages.pdf
10
+
11
+ # Example data
12
+ https://github.com/songweizhi/TreeSAK/tree/master/DemoData/mcmctree_vs_reltime
13
+
14
+ =================================================================================
15
+ '''
16
+
17
+
18
+ def sep_path_basename_ext(file_in):
19
+
20
+ f_path, f_name = os.path.split(file_in)
21
+ if f_path == '':
22
+ f_path = '.'
23
+ f_base, f_ext = os.path.splitext(f_name)
24
+
25
+ return f_name, f_path, f_base, f_ext[1:]
26
+
27
+
28
+ def get_lca(reltime_txt, leaf_1_name, leaf_2_name):
29
+
30
+ leaf_set = set()
31
+ child_to_parent_dict = dict()
32
+ id_to_name_dict = dict()
33
+ name_to_id_dict = dict()
34
+ for each_line in open(reltime_txt):
35
+ if not each_line.startswith('NodeLabel'):
36
+ each_line_split = each_line.strip().split('\t')
37
+ each_line_split = [i.strip() for i in each_line_split]
38
+ if len(each_line_split) > 1:
39
+ node_name = each_line_split[0].replace(' ', '_')
40
+ node_id = each_line_split[1]
41
+ des1 = each_line_split[2]
42
+ des2 = each_line_split[3]
43
+ id_to_name_dict[node_id] = node_name
44
+ name_to_id_dict[node_name] = node_id
45
+ child_to_parent_dict[des1] = node_id
46
+ child_to_parent_dict[des2] = node_id
47
+ if (des1 == '-') and (des2 == '-'):
48
+ leaf_set.add(node_id)
49
+
50
+ leaf_to_lineage_dict = dict()
51
+ for leaf in sorted([i for i in leaf_set]):
52
+ original_leaf = leaf
53
+ lineage_list = [leaf]
54
+ while leaf in child_to_parent_dict:
55
+ leaf_p = child_to_parent_dict[leaf]
56
+ lineage_list.append(leaf_p)
57
+ leaf = leaf_p
58
+ leaf_to_lineage_dict[original_leaf] = lineage_list
59
+
60
+ leaf_1_id = name_to_id_dict[leaf_1_name]
61
+ leaf_2_id = name_to_id_dict[leaf_2_name]
62
+ leaf_1_linage = leaf_to_lineage_dict[leaf_1_id]
63
+ leaf_2_linage = leaf_to_lineage_dict[leaf_2_id]
64
+
65
+ lca = ''
66
+ for each_p in leaf_1_linage[::-1]:
67
+ if each_p in leaf_2_linage:
68
+ lca = each_p
69
+
70
+ return lca
71
+
72
+
73
+ def parse_reltime(reltime_txt, interested_nodes_txt, op_txt):
74
+
75
+ lca_to_leaves_dict = dict()
76
+ interested_node_desc_dict = dict()
77
+ for interested_node in open(interested_nodes_txt):
78
+ interested_node_split = interested_node.strip().split('\t')
79
+ paired_leaves = interested_node_split[0]
80
+ interested_node_desc = paired_leaves
81
+ if len(interested_node_split) > 1:
82
+ interested_node_desc = interested_node_split[1]
83
+ interested_node_desc_dict[paired_leaves] = interested_node_desc
84
+ leaf_1 = paired_leaves.split(',')[0]
85
+ leaf_2 = paired_leaves.split(',')[1]
86
+ lca_id = get_lca(reltime_txt, leaf_1, leaf_2)
87
+ lca_to_leaves_dict[lca_id] = paired_leaves.strip()
88
+
89
+ op_txt_handle = open(op_txt, 'w')
90
+ line_num_index = 0
91
+ for each_line in open(reltime_txt):
92
+ each_line_split = each_line.strip().split('\t')
93
+ each_line_split = [i.strip() for i in each_line_split]
94
+ if line_num_index == 0:
95
+ op_txt_handle.write('ColorBy\tNode\tMean\tLow\tHigh\n')
96
+ else:
97
+ if len(each_line_split) > 1:
98
+ node_id = each_line_split[1]
99
+ if node_id in lca_to_leaves_dict:
100
+ node_id = each_line_split[1]
101
+ div_time = each_line_split[7]
102
+ ci_lower = each_line_split[8]
103
+ ci_upper = each_line_split[9]
104
+ corresponding_leaves = lca_to_leaves_dict[node_id]
105
+ interested_node_desc = interested_node_desc_dict[corresponding_leaves]
106
+ op_txt_handle.write('RelTime\t%s\t%s\t%s\t%s\n' % (interested_node_desc, div_time, ci_lower, ci_upper))
107
+ line_num_index += 1
108
+ op_txt_handle.close()
109
+
110
+
111
+ def mcmctree_out_to_tree_str(mamctree_out):
112
+
113
+ # get tree string from mamctree_out
114
+ tree_str = ''
115
+ tree_line = 0
116
+ current_line = 1
117
+ for each_line in open(mamctree_out):
118
+ if 'Species tree for FigTree. Branch lengths = posterior mean times; 95% CIs = labels' in each_line:
119
+ tree_line = current_line + 1
120
+ if tree_line == current_line:
121
+ tree_str = each_line.strip()
122
+ current_line += 1
123
+
124
+ tree_str_no_space = tree_str.replace(' ', '')
125
+
126
+ # rename tree nodes
127
+ t = Tree(tree_str_no_space, format=1)
128
+ for each_node in t.traverse():
129
+ if each_node.is_leaf():
130
+ node_name_new = '_'.join(each_node.name.split('_')[1:])
131
+ else:
132
+ node_name_new = 't_n%s' % each_node.name
133
+ each_node.name = node_name_new
134
+
135
+ tree_str_renamed = t.write(format=8)
136
+
137
+ return tree_str_renamed
138
+
139
+
140
+ def get_internal_node_to_plot(node_txt, mo_file):
141
+
142
+ tree_str = ''
143
+ if os.path.isfile(mo_file):
144
+ tree_str = mcmctree_out_to_tree_str(mo_file)
145
+
146
+ # get nodes to plot
147
+ node_set = set()
148
+ node_rename_dict = dict()
149
+ if os.path.isfile(node_txt) is True:
150
+ for each in open(node_txt):
151
+ each_split = each.strip().split('\t')
152
+ node_str = each_split[0]
153
+
154
+ # get internal_node_to_plot
155
+ internal_node_to_plot = ''
156
+ if ',' not in node_str:
157
+ internal_node_to_plot = each_split[0]
158
+ else:
159
+ leaf_list = node_str.split(',')
160
+ if tree_str == '':
161
+ print('MCMCTree out file not found, program exited!')
162
+ exit()
163
+ current_lca = Tree(tree_str, format=1).get_common_ancestor(leaf_list)
164
+ internal_node_to_plot = current_lca.name
165
+
166
+ # add internal_node_to_plot to node_set
167
+ if internal_node_to_plot != '':
168
+ node_set.add(internal_node_to_plot)
169
+
170
+ # read in name to show in plot
171
+ if len(each_split) == 2:
172
+ if each_split[1] != '':
173
+ node_rename_dict[internal_node_to_plot] = each_split[1]
174
+ else:
175
+ node_set = node_txt.split(',')
176
+
177
+ return node_set, node_rename_dict, tree_str
178
+
179
+
180
+ def read_in_posterior_mean(mcmctree_out):
181
+
182
+ # read in Posterior mean
183
+ node_to_mean_hpd95_dict = dict()
184
+ current_line = 1
185
+ posterior_mean_header_line = 0
186
+ for each_line in open(mcmctree_out):
187
+ if 'Posterior mean (95% Equal-tail CI) (95% HPD CI) HPD-CI-width' in each_line:
188
+ posterior_mean_header_line = current_line
189
+
190
+ if (posterior_mean_header_line != 0) and (current_line > posterior_mean_header_line):
191
+ each_line_split = each_line.strip().split(' ')
192
+
193
+ each_line_split_no_empty = []
194
+ for each_element in each_line_split:
195
+ if each_element not in ['', '(']:
196
+ each_element_value = each_element.replace('(', '').replace(')', '').replace(',', '')
197
+ each_line_split_no_empty.append(each_element_value)
198
+ if len(each_line_split_no_empty) == 9:
199
+ node_id = each_line_split_no_empty[0]
200
+ value_mean = each_line_split_no_empty[1]
201
+ value_hpd95_small = each_line_split_no_empty[4]
202
+ value_hpd95_big = each_line_split_no_empty[5]
203
+ node_to_mean_hpd95_dict[node_id] = [value_mean, value_hpd95_small, value_hpd95_big]
204
+ current_line += 1
205
+
206
+ return node_to_mean_hpd95_dict
207
+
208
+
209
+ def parse_mcmc_out(mcmc_out_file, node_txt, dm_out):
210
+
211
+ dm_out_handle = open(dm_out, 'a')
212
+ #dm_out_handle.write('Test\tShape\tVar\tMean\tLow\tHigh\n')
213
+ node_set, node_rename_dict, tree_str = get_internal_node_to_plot(node_txt, mcmc_out_file)
214
+ node_to_mean_95_hpd_dict = read_in_posterior_mean(mcmc_out_file)
215
+ for each_node in node_set:
216
+ node_name_to_write = node_rename_dict.get(each_node, each_node)
217
+ mean_95_hpd_list = node_to_mean_95_hpd_dict.get(each_node)
218
+ dm_out_handle.write('MCMCTree\t%s\t%s\n' % (node_name_to_write, '\t'.join(mean_95_hpd_list)))
219
+ dm_out_handle.close()
220
+
221
+
222
+ def mcmctree_vs_reltime(args):
223
+
224
+ mcmc_out_file = args['m']
225
+ reltime_txt = args['r']
226
+ interested_nodes_txt = args['n']
227
+ pdf_out = args['o']
228
+
229
+ dm_out_combined = '.'.join(pdf_out.split('.')[:-1]) + '.txt'
230
+
231
+ # define mcmctree_vs_reltime_R
232
+ pwd_current_file = os.path.realpath(__file__)
233
+ current_file_path = '/'.join(pwd_current_file.split('/')[:-1])
234
+ mcmctree_vs_reltime_R = '%s/mcmctree_vs_reltime.R' % current_file_path
235
+
236
+ parse_reltime(reltime_txt, interested_nodes_txt, dm_out_combined)
237
+ parse_mcmc_out(mcmc_out_file, interested_nodes_txt, dm_out_combined)
238
+
239
+ plot_cmd = 'Rscript %s -i %s -x %s -y %s -o %s' % (mcmctree_vs_reltime_R, dm_out_combined, 8, 5, pdf_out)
240
+ os.system(plot_cmd)
241
+ print('Plot exported to: %s' % pdf_out)
242
+
243
+
244
+ if __name__ == '__main__':
245
+
246
+ mcmctree_vs_reltime_parser = argparse.ArgumentParser()
247
+ mcmctree_vs_reltime_parser.add_argument('-m', required=True, help='.out file from MCMCTree')
248
+ mcmctree_vs_reltime_parser.add_argument('-r', required=True, help='output from elTime')
249
+ mcmctree_vs_reltime_parser.add_argument('-n', required=True, help='interested nodes txt file')
250
+ mcmctree_vs_reltime_parser.add_argument('-o', required=True, help='output pdf')
251
+ args = vars(mcmctree_vs_reltime_parser.parse_args())
252
+ mcmctree_vs_reltime(args)
TreeSAK/merge_pdf.py ADDED
@@ -0,0 +1,32 @@
1
+ import os
2
+ import glob
3
+ import math
4
+ import random
5
+ import argparse
6
+ import seaborn as sns
7
+ from ete3 import Tree
8
+ from itolapi import Itol
9
+ from PyPDF3.pdf import PageObject
10
+ from PyPDF3 import PdfFileWriter, PdfFileReader
11
+ from PyPDF3.generic import RectangleObject
12
+ # https://pypdf2.readthedocs.io/en/3.0.0/user/adding-pdf-annotations.html
13
+
14
+ def merge_pdf(pdf_1, pdf_2, margin_size, op_pdf):
15
+
16
+ page1 = PdfFileReader(open(pdf_1, "rb"), strict=False).getPage(0)
17
+ page2 = PdfFileReader(open(pdf_2, "rb"), strict=False).getPage(0)
18
+
19
+ total_width = page1.mediaBox.upperRight[0] + page2.mediaBox.upperRight[0] + margin_size*3
20
+ total_height = max([page1.mediaBox.upperRight[1], page2.mediaBox.upperRight[1]]) + margin_size*2
21
+ new_page = PageObject.createBlankPage(None, total_width, total_height)
22
+ new_page.mergeTranslatedPage(page1, margin_size, (total_height-margin_size-page1.mediaBox.upperRight[1]))
23
+ new_page.mergeTranslatedPage(page2, (page1.mediaBox.upperRight[0] + margin_size*2), margin_size)
24
+ output = PdfFileWriter()
25
+ output.addPage(new_page)
26
+ output.write(open(op_pdf, "wb"))
27
+
28
+
29
+ merge_pdf('/Users/songweizhi/Desktop/1.pdf',
30
+ '/Users/songweizhi/Desktop/2.pdf',
31
+ 66,
32
+ '/Users/songweizhi/Desktop/merged.pdf')
TreeSAK/pRTC.py ADDED
@@ -0,0 +1,56 @@
1
+ import os
2
+ import argparse
3
+
4
+
5
+ pRTC_usage = '''
6
+ ==================================== pRTC example commands ====================================
7
+
8
+ # requires: ruby (bio-nwk, colorize, parallel, csv)
9
+
10
+ TreeSAK pRTC -i out -m mcmc.txt -r rtc_dir -o after_pRTC_mcmc.txt
11
+
12
+ python3 # format of RTC file (tab separated)
13
+ symbiont_1,symbiont_2 host_1,host_2:0.9876
14
+ recipient1,recipient2 donor1,donor2:0.6789
15
+
16
+ # The above two lines can be interpreted as follows:
17
+ The probability of the last common ancestor (LCA) of symbiont_1 and symbiont_2 being
18
+ younger than that of host_1 and host_2 is 0.9876. Similarly, for a gene transfer event,
19
+ the LCA of the two recipients is younger than that of the two donors, the value 0.6789
20
+ is the PROBABILITY of the occurrence of the transfer event. Please do NOT use this module
21
+ if you are unsure about the interpretation of the value you provided.
22
+
23
+ # Note
24
+ This is a Python wrapper to perform the probabilistic RTC dating proposed by Dr. Sishuo Wang.
25
+ If you used it in your dating analysis, please cite:
26
+ https://doi.org/10.1101/2023.06.18.545440 or https://github.com/evolbeginner/rrtc.
27
+
28
+ ===============================================================================================
29
+ '''
30
+
31
+
32
+ def pRTC(args):
33
+
34
+ in_file = args['i']
35
+ mcmc_txt = args['m']
36
+ rrtc_dir = args['r']
37
+ ruby_exe = args['ruby']
38
+ op_txt = args['o']
39
+
40
+ current_file_path = '/'.join(os.path.realpath(__file__).split('/')[:-1])
41
+ do_rrtc_rb = '%s/do_rrtc.rb' % current_file_path
42
+ do_rrtc_cmd = '%s %s --mcmctxt %s -i %s --rrtc %s > %s' % (ruby_exe, do_rrtc_rb, mcmc_txt, in_file, rrtc_dir, op_txt)
43
+ print('Running: ' + do_rrtc_cmd)
44
+ os.system(do_rrtc_cmd)
45
+
46
+
47
+ if __name__ == '__main__':
48
+
49
+ pRTC_parser = argparse.ArgumentParser(usage=pRTC_usage)
50
+ pRTC_parser.add_argument('-i', required=True, help='the file "out" generated by MCMCTree')
51
+ pRTC_parser.add_argument('-m', required=True, help='the file "mcmc.txt" generated by MCMCTree')
52
+ pRTC_parser.add_argument('-r', required=True, help='the folder that contains all RTCs')
53
+ pRTC_parser.add_argument('-o', required=True, help='output txt file')
54
+ pRTC_parser.add_argument('-ruby', required=False, default='ruby', help='path to ruby executable file, default: ruby')
55
+ args = vars(pRTC_parser.parse_args())
56
+ pRTC(args)