treesak 1.51.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of treesak might be problematic. Click here for more details.

Files changed (125) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +130 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/CompareMCMC.py +138 -0
  19. TreeSAK/ConcateMSA.py +111 -0
  20. TreeSAK/ConvertMSA.py +135 -0
  21. TreeSAK/Dir.rb +82 -0
  22. TreeSAK/ExtractMarkerSeq.py +263 -0
  23. TreeSAK/FastRoot.py +1175 -0
  24. TreeSAK/FastRoot_backup.py +1122 -0
  25. TreeSAK/FigTree.py +34 -0
  26. TreeSAK/GTDB_tree.py +76 -0
  27. TreeSAK/GeneTree.py +142 -0
  28. TreeSAK/KEGG_Luo17.py +807 -0
  29. TreeSAK/LcaToLeaves.py +66 -0
  30. TreeSAK/MarkerRef2Tree.py +616 -0
  31. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  32. TreeSAK/MarkerSeq2Tree.py +290 -0
  33. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  34. TreeSAK/ModifyTopo.py +116 -0
  35. TreeSAK/Newick_tree_plotter.py +79 -0
  36. TreeSAK/OMA.py +170 -0
  37. TreeSAK/OMA2.py +212 -0
  38. TreeSAK/OneLineAln.py +50 -0
  39. TreeSAK/PB.py +155 -0
  40. TreeSAK/PMSF.py +106 -0
  41. TreeSAK/PhyloBiAssoc.R +84 -0
  42. TreeSAK/PhyloBiAssoc.py +167 -0
  43. TreeSAK/PlotMCMC.py +41 -0
  44. TreeSAK/PlotMcmcNode.py +152 -0
  45. TreeSAK/PlotMcmcNode_old.py +252 -0
  46. TreeSAK/RootTree.py +101 -0
  47. TreeSAK/RootTreeGTDB214.py +288 -0
  48. TreeSAK/RootTreeGTDB220.py +300 -0
  49. TreeSAK/RootTreeGTDB226.py +300 -0
  50. TreeSAK/SequentialDating.py +16 -0
  51. TreeSAK/SingleAleHGT.py +157 -0
  52. TreeSAK/SingleLinePhy.py +50 -0
  53. TreeSAK/SliceMSA.py +142 -0
  54. TreeSAK/SplitScore.py +19 -0
  55. TreeSAK/SplitScore1.py +178 -0
  56. TreeSAK/SplitScore1OMA.py +148 -0
  57. TreeSAK/SplitScore2.py +597 -0
  58. TreeSAK/TaxaCountStats.R +256 -0
  59. TreeSAK/TaxonTree.py +47 -0
  60. TreeSAK/TreeSAK_config.py +32 -0
  61. TreeSAK/VERSION +158 -0
  62. TreeSAK/VisHPD95.R +45 -0
  63. TreeSAK/VisHPD95.py +200 -0
  64. TreeSAK/__init__.py +0 -0
  65. TreeSAK/ale_parser.py +74 -0
  66. TreeSAK/ale_splitter.py +63 -0
  67. TreeSAK/alignment_pruner.pl +1471 -0
  68. TreeSAK/assessOG.py +45 -0
  69. TreeSAK/catfasta2phy.py +140 -0
  70. TreeSAK/cogTree.py +185 -0
  71. TreeSAK/compare_trees.R +30 -0
  72. TreeSAK/compare_trees.py +255 -0
  73. TreeSAK/dating.py +264 -0
  74. TreeSAK/dating_ss.py +361 -0
  75. TreeSAK/deltall.py +82 -0
  76. TreeSAK/do_rrtc.rb +464 -0
  77. TreeSAK/fa2phy.py +42 -0
  78. TreeSAK/format_leaf_name.py +70 -0
  79. TreeSAK/gap_stats.py +38 -0
  80. TreeSAK/get_SCG_tree.py +742 -0
  81. TreeSAK/get_arCOG_seq.py +97 -0
  82. TreeSAK/global_functions.py +222 -0
  83. TreeSAK/gnm_leaves.py +43 -0
  84. TreeSAK/iTOL.py +791 -0
  85. TreeSAK/iTOL_gene_tree.py +80 -0
  86. TreeSAK/itol_msa_stats.py +56 -0
  87. TreeSAK/keep_highest_rrtc.py +37 -0
  88. TreeSAK/koTree.py +194 -0
  89. TreeSAK/label_tree.R +75 -0
  90. TreeSAK/label_tree.py +121 -0
  91. TreeSAK/mad.py +708 -0
  92. TreeSAK/mcmc2tree.py +58 -0
  93. TreeSAK/mcmcTC copy.py +92 -0
  94. TreeSAK/mcmcTC.py +104 -0
  95. TreeSAK/mcmctree_vs_reltime.R +44 -0
  96. TreeSAK/mcmctree_vs_reltime.py +252 -0
  97. TreeSAK/merge_pdf.py +32 -0
  98. TreeSAK/pRTC.py +56 -0
  99. TreeSAK/parse_mcmctree.py +198 -0
  100. TreeSAK/parse_reltime.py +141 -0
  101. TreeSAK/phy2fa.py +37 -0
  102. TreeSAK/plot_distruibution_th.py +165 -0
  103. TreeSAK/prep_mcmctree_ctl.py +92 -0
  104. TreeSAK/print_leaves.py +32 -0
  105. TreeSAK/pruneMSA.py +63 -0
  106. TreeSAK/recode.py +73 -0
  107. TreeSAK/remove_bias.R +112 -0
  108. TreeSAK/rename_leaves.py +77 -0
  109. TreeSAK/replace_clade.py +55 -0
  110. TreeSAK/root_with_out_group.py +84 -0
  111. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  112. TreeSAK/subsample_drep_gnms.py +74 -0
  113. TreeSAK/subset.py +69 -0
  114. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  115. TreeSAK/supertree.py +330 -0
  116. TreeSAK/tmp_1.py +19 -0
  117. TreeSAK/tmp_2.py +19 -0
  118. TreeSAK/tmp_3.py +120 -0
  119. TreeSAK/weighted_rand.rb +23 -0
  120. treesak-1.51.2.data/scripts/TreeSAK +950 -0
  121. treesak-1.51.2.dist-info/LICENSE +674 -0
  122. treesak-1.51.2.dist-info/METADATA +27 -0
  123. treesak-1.51.2.dist-info/RECORD +125 -0
  124. treesak-1.51.2.dist-info/WHEEL +5 -0
  125. treesak-1.51.2.dist-info/top_level.txt +1 -0
TreeSAK/VisHPD95.py ADDED
@@ -0,0 +1,200 @@
1
+ import os
2
+ import glob
3
+ import argparse
4
+ from ete3 import Tree
5
+ from TreeSAK.TreeSAK_config import config_dict
6
+
7
+
8
+ def mcmctree_out_to_tree_str(mamctree_out):
9
+
10
+ # get tree string from mamctree_out
11
+ tree_str = ''
12
+ tree_line = 0
13
+ current_line = 1
14
+ for each_line in open(mamctree_out):
15
+ if 'Species tree for FigTree. Branch lengths = posterior mean times; 95% CIs = labels' in each_line:
16
+ tree_line = current_line + 1
17
+ if tree_line == current_line:
18
+ tree_str = each_line.strip()
19
+ current_line += 1
20
+
21
+ tree_str_no_space = tree_str.replace(' ', '')
22
+
23
+ # rename tree nodes
24
+ t = Tree(tree_str_no_space, format=1)
25
+ for each_node in t.traverse():
26
+ if each_node.is_leaf():
27
+ node_name_new = '_'.join(each_node.name.split('_')[1:])
28
+ else:
29
+ node_name_new = 't_n%s' % each_node.name
30
+ each_node.name = node_name_new
31
+
32
+ tree_str_renamed = t.write(format=8)
33
+
34
+ return tree_str_renamed
35
+
36
+
37
+ def get_internal_node_to_plot(node_txt, mo_file):
38
+
39
+ tree_str = ''
40
+ if os.path.isfile(mo_file):
41
+ tree_str = mcmctree_out_to_tree_str(mo_file)
42
+
43
+ # get nodes to plot
44
+ node_set = set()
45
+ node_rename_dict = dict()
46
+ if os.path.isfile(node_txt) is True:
47
+ for each in open(node_txt):
48
+ each_split = each.strip().split('\t')
49
+ node_str = each_split[0]
50
+
51
+ # get internal_node_to_plot
52
+ internal_node_to_plot = ''
53
+ if ',' not in node_str:
54
+ internal_node_to_plot = each_split[0]
55
+ else:
56
+ leaf_list = node_str.split(',')
57
+ if tree_str == '':
58
+ print('MCMCTree out file not found, program exited!')
59
+ exit()
60
+ current_lca = Tree(tree_str, format=1).get_common_ancestor(leaf_list)
61
+ internal_node_to_plot = current_lca.name
62
+
63
+ # add internal_node_to_plot to node_set
64
+ if internal_node_to_plot != '':
65
+ node_set.add(internal_node_to_plot)
66
+
67
+ # read in name to show in plot
68
+ if len(each_split) == 2:
69
+ if each_split[1] != '':
70
+ node_rename_dict[internal_node_to_plot] = each_split[1]
71
+ else:
72
+ node_set = node_txt.split(',')
73
+
74
+ return node_set, node_rename_dict, tree_str
75
+
76
+
77
+ def read_in_posterior_mean(mcmctree_out):
78
+
79
+ # read in Posterior mean
80
+ node_to_mean_hpd95_dict = dict()
81
+ current_line = 1
82
+ posterior_mean_header_line = 0
83
+ for each_line in open(mcmctree_out):
84
+ if 'Posterior mean (95% Equal-tail CI) (95% HPD CI) HPD-CI-width' in each_line:
85
+ posterior_mean_header_line = current_line
86
+
87
+ if (posterior_mean_header_line != 0) and (current_line > posterior_mean_header_line):
88
+ each_line_split = each_line.strip().split(' ')
89
+
90
+ each_line_split_no_empty = []
91
+ for each_element in each_line_split:
92
+ if each_element not in ['', '(']:
93
+ each_element_value = each_element.replace('(', '').replace(')', '').replace(',', '')
94
+ each_line_split_no_empty.append(each_element_value)
95
+ if len(each_line_split_no_empty) == 9:
96
+ node_id = each_line_split_no_empty[0]
97
+ value_mean = each_line_split_no_empty[1]
98
+ value_hpd95_small = each_line_split_no_empty[4]
99
+ value_hpd95_big = each_line_split_no_empty[5]
100
+ node_to_mean_hpd95_dict[node_id] = [value_mean, value_hpd95_small, value_hpd95_big]
101
+ current_line += 1
102
+
103
+ return node_to_mean_hpd95_dict
104
+
105
+
106
+ VisHPD95_usage = '''
107
+ ============================ VisHPD95 example command ============================
108
+
109
+ TreeSAK VisHPD95 -i mcmc_out -o HPD95.pdf -n nodes.txt -label label.txt
110
+ TreeSAK VisHPD95 -i mcmc_out -o HPD95.pdf -n nodes.txt -label label.txt -x 9 -y 6
111
+
112
+ # Example data
113
+ https://github.com/songweizhi/TreeSAK/tree/master/DemoData/VisHPD95
114
+
115
+ ==================================================================================
116
+ '''
117
+
118
+
119
+ def VisHPD95(args):
120
+
121
+ mcmc_in = args['i']
122
+ node_txt = args['n']
123
+ label_txt = args['label']
124
+ plot_out = args['o']
125
+ plot_width = args['x']
126
+ plot_height = args['y']
127
+
128
+ pwd_current_file = os.path.realpath(__file__)
129
+ current_file_path = '/'.join(pwd_current_file.split('/')[:-1])
130
+ VisHPD95_R = '%s/VisHPD95.R' % current_file_path
131
+
132
+ dm_out = '%s.txt' % plot_out
133
+
134
+ # check MCMCTree output file/dir
135
+ if os.path.isfile(mcmc_in) is True:
136
+ mcmc_out_file_list = [mcmc_in]
137
+ else:
138
+ mcmc_out_file_re = '%s/*_out.txt' % (mcmc_in)
139
+ mcmc_out_file_list = glob.glob(mcmc_out_file_re)
140
+
141
+ if len(mcmc_out_file_list) == 0:
142
+ print('MCMCTree out file not found, program exited!')
143
+ exit()
144
+
145
+ # read in y-axis label file
146
+ label_dict = dict()
147
+ color_dict = dict()
148
+ shape_dict = dict()
149
+ if label_txt is not None:
150
+ for each_sample in open(label_txt):
151
+ each_sample_split = each_sample.strip().split('\t')
152
+ if len(each_sample_split) == 3:
153
+ label_dict[each_sample_split[0]] = each_sample_split[1]
154
+ color_dict[each_sample_split[0]] = each_sample_split[1]
155
+ shape_dict[each_sample_split[0]] = each_sample_split[2]
156
+ else:
157
+ print('Format error: %s' % label_txt)
158
+ exit()
159
+
160
+ dm_out_handle = open(dm_out, 'w')
161
+ dm_out_handle.write('Test\tShape\tVar\tMean\tLow\tHigh\n')
162
+ for mcmc_out_file in mcmc_out_file_list:
163
+ mcmc_out_file_no_path = mcmc_out_file
164
+ if '/' in mcmc_out_file_no_path:
165
+ mcmc_out_file_no_path = mcmc_out_file_no_path.split('/')[-1]
166
+
167
+ color_col_to_write = color_dict.get(mcmc_out_file_no_path, mcmc_out_file_no_path)
168
+ shape_col_to_write = shape_dict.get(mcmc_out_file_no_path, mcmc_out_file_no_path)
169
+ node_set, node_rename_dict, tree_str = get_internal_node_to_plot(node_txt, mcmc_out_file)
170
+ node_to_mean_95_hpd_dict = read_in_posterior_mean(mcmc_out_file)
171
+
172
+ for each_node in node_set:
173
+ node_name_to_write = node_rename_dict.get(each_node, each_node)
174
+ mean_95_hpd_list = node_to_mean_95_hpd_dict.get(each_node)
175
+ dm_out_handle.write('%s\t%s\t%s\t%s\n' % (color_col_to_write, shape_col_to_write, node_name_to_write, '\t'.join(mean_95_hpd_list)))
176
+ dm_out_handle.close()
177
+
178
+ plot_cmd = 'Rscript %s -i %s -x %s -y %s -o %s' % (VisHPD95_R, dm_out, plot_width, plot_height, plot_out)
179
+ os.system(plot_cmd)
180
+ print('Plot exported to: %s' % plot_out)
181
+
182
+
183
+ if __name__ == '__main__':
184
+
185
+ VisHPD95_parser = argparse.ArgumentParser()
186
+ VisHPD95_parser.add_argument('-i', required=True, help='mcmc.txt file or folder')
187
+ VisHPD95_parser.add_argument('-n', required=True, help='Nodes to plot')
188
+ VisHPD95_parser.add_argument('-label', required=False, default=None, help='labels on y axis')
189
+ VisHPD95_parser.add_argument('-x', required=False, default=8,type=int, help='plot width, default: 8')
190
+ VisHPD95_parser.add_argument('-y', required=False, default=5,type=int, help='plot height, default: 5')
191
+ VisHPD95_parser.add_argument('-o', required=True, help='Output plot')
192
+ args = vars(VisHPD95_parser.parse_args())
193
+ VisHPD95(args)
194
+
195
+ '''
196
+
197
+ cd /Users/songweizhi/Desktop/777
198
+ python3 ~/PycharmProjects/TreeSAK/TreeSAK/VisHPD95.py -i M1_mcmc_txt -o M1_HPD95.pdf -n nodes_five.txt -label y_label_out.txt
199
+
200
+ '''
TreeSAK/__init__.py ADDED
File without changes
TreeSAK/ale_parser.py ADDED
@@ -0,0 +1,74 @@
1
+ import sys
2
+ import os
3
+ import argparse
4
+
5
+
6
+ def ale_parser(rec_folder, options):
7
+ rec_files = [x for x in os.listdir(rec_folder) if x.endswith("uml_rec")]
8
+
9
+ table_info = list()
10
+ table_events = list()
11
+
12
+ for rec_file in rec_files:
13
+ with open(os.path.join(rec_folder, rec_file)) as f:
14
+ fam = rec_file.replace(".ale.uml_rec", "")
15
+
16
+ lines = f.readlines()
17
+ stree = lines[2].strip()
18
+ ll = lines[6].strip().split()[-1]
19
+ dp, tp, lp = lines[8].strip().split("\t")[1:]
20
+ n_reconciled_trees = int(lines[9].strip().split()[0])
21
+ reconciled_trees = lines[11:n_reconciled_trees + 11]
22
+ de, te, le, se = lines[11 + n_reconciled_trees + 1].split("\t")[1:]
23
+ table = lines[11 + n_reconciled_trees + 3:]
24
+
25
+ table_info.append((fam, ll, dp, tp, lp, de, te, le, se))
26
+ table_events.append((fam, table))
27
+
28
+ if options[0]:
29
+ with open("SpeciesTreeRef.newick", "w") as f:
30
+ f.write(stree.split("\t")[-1])
31
+
32
+ if options[1]:
33
+
34
+ with open("TableInfo.tsv", "w") as f:
35
+ head = "\t".join(["Family", "LL", "Dp", "Tp", "Lp", "De", "Te", "Le", "Se"]) + "\n"
36
+ f.write(head)
37
+ for info in table_info:
38
+ f.write("\t".join(info))
39
+
40
+ if options[2]:
41
+
42
+ with open("TableEvents.tsv", "w") as f:
43
+
44
+ header = "Family\tBranchType\t" + table[0].replace("# of", "Branch")
45
+ f.write(header)
46
+
47
+ for fam, events in table_events:
48
+ for b in events[1:]:
49
+ f.write(fam + "\t" + b)
50
+
51
+ if options[3]:
52
+ with open("GeneTrees.nwk", "w") as f:
53
+ for t in reconciled_trees:
54
+ f.write(t)
55
+
56
+
57
+ if __name__ == "__main__":
58
+
59
+ parser = argparse.ArgumentParser()
60
+ parser.add_argument("-i", help="Folder with uml_rec files")
61
+ parser.add_argument("-s", help="Prints species tree to a different file", action='store_true', default=False)
62
+ parser.add_argument("-f", help="Prints info about the family (LogLikelihood, probabilities and total number of events, to a different file", action='store_true', default=False)
63
+ parser.add_argument("-t", help="Prints reconciliation table to a different file", action='store_true', default=False)
64
+ parser.add_argument("-g", help="Prints gene trees to a different file", action='store_true', default=False)
65
+
66
+ args = parser.parse_args()
67
+
68
+ if args.i == None:
69
+ print("use python ale_parser.py -h to see options")
70
+ print("you can run this script just with python ale_parser -i FolderWithRecFiles -sft")
71
+ exit(0)
72
+
73
+ ale_parser(args.i, [args.s, args.f, args.t, args.g])
74
+
@@ -0,0 +1,63 @@
1
+ import sys
2
+ import os
3
+ import argparse
4
+
5
+
6
+ def ale_splitter(rec_file, options):
7
+ with open(rec_file) as f:
8
+
9
+ lines = f.readlines()
10
+
11
+ stree = lines[2].strip()
12
+ ll = lines[6].strip().split()[-1]
13
+ rates = lines[8].strip().split("\t")[1:]
14
+
15
+ n_reconciled_trees = int(lines[9].strip().split()[0])
16
+ reconciled_trees = lines[11:n_reconciled_trees + 11]
17
+ n_of_events = lines[11 + n_reconciled_trees + 1].split("\t")[1:]
18
+ table = lines[11 + n_reconciled_trees + 3:]
19
+
20
+ if options[0]:
21
+ with open(rec_file.replace("uml_rec", "stree"), "w") as f:
22
+ f.write(stree.split("\t")[-1])
23
+ if options[1]:
24
+ with open(rec_file.replace("uml_rec", "info"), "w") as f:
25
+ f.write("LL:" + "\t" + ll + "\n")
26
+ f.write("Dp:" + "\t" + rates[0] + "\n")
27
+ f.write("Tp:" + "\t" + rates[1] + "\n")
28
+ f.write("Lp:" + "\t" + rates[2] + "\n")
29
+ f.write("De:" + "\t" + n_of_events[0] + "\n")
30
+ f.write("Te:" + "\t" + n_of_events[1] + "\n")
31
+ f.write("Le:" + "\t" + n_of_events[2] + "\n")
32
+ f.write("Se:" + "\t" + n_of_events[3] + "\n")
33
+
34
+ if options[2]:
35
+ with open(rec_file.replace("uml_rec", "recs"), "w") as f:
36
+ for t in reconciled_trees:
37
+ f.write(t)
38
+ if options[3]:
39
+ with open(rec_file.replace("uml_rec", "rec_table"), "w") as f:
40
+ for e in table:
41
+ f.write(e)
42
+
43
+
44
+ if __name__ == "__main__":
45
+
46
+ parser = argparse.ArgumentParser()
47
+ parser.add_argument("-i", help="Uml_rec file")
48
+ parser.add_argument("-s", help="Prints species tree to a different file", action='store_true', default=False)
49
+ parser.add_argument("-f",
50
+ help="Prints info about the family (LogLikelihood, probabilities and total number of events, to a different file",
51
+ action='store_true', default=False)
52
+ parser.add_argument("-t", help="Prints reconciliation table to a different file", action='store_true',
53
+ default=False)
54
+ parser.add_argument("-r", help="Prints reconciled gene trees to a different file", action='store_true',
55
+ default=False)
56
+ args = parser.parse_args()
57
+
58
+ if args.i == None:
59
+ print("use python ale_splitter.py -h to see options")
60
+ print("you can run this script just with python ale_splitter -i XXXX.uml_rec")
61
+ exit(0)
62
+
63
+ ale_splitter(args.i, [args.s, args.f, args.t, args.r])