treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,252 @@
1
+ import os
2
+ import glob
3
+ import argparse
4
+ import pandas as pd
5
+ from ete3 import Tree
6
+ import plotly.express as px
7
+
8
+
9
+ PlotMcmcNode_usage = '''
10
+ ============================ PlotMcmcNode example commands ============================
11
+
12
+ TreeSAK PlotMcmcNode -i McmcTree_op_files -n n179 -o Clock2_n179.pdf
13
+ TreeSAK PlotMcmcNode -i McmcTree_op_files -n n161,n186 -o Clock3_n161_n186.pdf
14
+ TreeSAK PlotMcmcNode -i McmcTree_op_files -n nodes.txt -o multi_runs_multi_nodes.pdf
15
+
16
+ # File name of the mcmc.txt and the corresponding mcmc out file need to follow
17
+ # the rule as specified below:
18
+ [setting_1]_mcmc.txt
19
+ [setting_1]_out.txt
20
+ [setting_2]_mcmc.txt
21
+ [setting_2]_out.txt
22
+
23
+ # file format (-n, tab separated)
24
+ # leave the 2nd column blank for nodes without renaming
25
+ setting_1 node1 Bacteria
26
+ setting_2 node2
27
+ setting_3 node3,node9 Archaea
28
+
29
+ # Y-axis label file format (-l, tab separated)
30
+ PA_75_DeltaLL_50_clock3_mcmc.txt DeltaLL_50
31
+ PA_75_DeltaLL_75_clock3_mcmc.txt DeltaLL_75
32
+
33
+ =======================================================================================
34
+ '''
35
+
36
+ def sep_path_basename_ext(file_in):
37
+
38
+ # separate path and file name
39
+ f_path, file_name = os.path.split(file_in)
40
+ if f_path == '':
41
+ f_path = '.'
42
+
43
+ # separate file basename and extension
44
+ f_base, f_ext = os.path.splitext(file_name)
45
+
46
+ return f_path, f_base, f_ext
47
+
48
+
49
+ def mcmctree_out_to_tree_str(mamctree_out):
50
+
51
+ # get tree string from mcmctree_out
52
+ tree_str = ''
53
+ tree_line = 0
54
+ current_line = 1
55
+ for each_line in open(mamctree_out):
56
+ if 'Species tree for FigTree. Branch lengths = posterior mean times; 95% CIs = labels' in each_line:
57
+ tree_line = current_line + 1
58
+ if tree_line == current_line:
59
+ tree_str = each_line.strip()
60
+ current_line += 1
61
+
62
+ tree_str_no_space = tree_str.replace(' ', '')
63
+
64
+ # rename tree nodes
65
+ t = Tree(tree_str_no_space, format=1)
66
+ for each_node in t.traverse():
67
+ if each_node.is_leaf():
68
+ node_name_new = '_'.join(each_node.name.split('_')[1:])
69
+ else:
70
+ node_name_new = 't_n%s' % each_node.name
71
+ each_node.name = node_name_new
72
+
73
+ tree_str_renamed = t.write(format=8)
74
+
75
+ return tree_str_renamed
76
+
77
+
78
+ def plot_distribution(df_txt, output_plot):
79
+
80
+ df = pd.read_table(df_txt, sep=',')
81
+ run_id_list = df['Setting'].unique()
82
+ node_id_list = df['Node'].unique()
83
+
84
+ # sort dataframe by run id
85
+ df = df.sort_values(by='Setting', ascending=False)
86
+
87
+ plot_width = 900
88
+ plot_height = len(run_id_list)*100
89
+ if plot_height < 360:
90
+ plot_height = 360
91
+
92
+ fig = px.violin(df, x="Value", y="Setting", color="Node", points=False, orientation="h", width=plot_width, height=plot_height)
93
+ if len(node_id_list) == 1:
94
+ fig.update_traces(side="positive", fillcolor='lightblue', width=1.6, opacity=0.75)
95
+ else:
96
+ fig.update_traces(side="positive", fillcolor='rgba(0,0,0,0)', width=1.6)
97
+
98
+ fig.update_traces(showlegend=True)
99
+ fig.layout.template = "simple_white"
100
+ # fig.layout.width = 700
101
+ # fig.layout.height = 750
102
+ # fig.update_xaxes(range=[40, 0])
103
+ # fig.update_layout(margin_t=10, title_text='Demo', title_x=0.5)
104
+ fig.write_image(output_plot)
105
+
106
+
107
+ def get_internal_node_to_plot(node_txt, mo_file):
108
+
109
+ tree_str = ''
110
+ if os.path.isfile(mo_file):
111
+ tree_str = mcmctree_out_to_tree_str(mo_file)
112
+
113
+ # get nodes to plot
114
+ node_set = set()
115
+ node_rename_dict = dict()
116
+ if os.path.isfile(node_txt) is True:
117
+ for each in open(node_txt):
118
+ each_split = each.strip().split('\t')
119
+ node_str = each_split[0]
120
+
121
+ # get internal_node_to_plot
122
+ internal_node_to_plot = ''
123
+ if ',' not in node_str:
124
+ internal_node_to_plot = each_split[0]
125
+ else:
126
+ leaf_list = node_str.split(',')
127
+ if tree_str == '':
128
+ print('*out.txt file not found, program exited!')
129
+ exit()
130
+ current_lca = Tree(tree_str, format=1).get_common_ancestor(leaf_list)
131
+ internal_node_to_plot = current_lca.name
132
+
133
+ # add internal_node_to_plot to node_set
134
+ if internal_node_to_plot != '':
135
+ node_set.add(internal_node_to_plot)
136
+
137
+ # read in name to show in plot
138
+ if len(each_split) == 2:
139
+ if each_split[1] != '':
140
+ node_rename_dict[internal_node_to_plot] = each_split[1]
141
+ else:
142
+ node_set = node_txt.split(',')
143
+
144
+ return node_set, node_rename_dict, tree_str
145
+
146
+
147
+ def PlotMcmcNode(args):
148
+
149
+ mcmc_in = args['i']
150
+ node_txt = args['n']
151
+ output_plot = args['o']
152
+ specified_out_file = args['of']
153
+ y_label_txt = args['l']
154
+ keep_tmp_file = args['tmp']
155
+
156
+ # check MCMCTree output file/dir
157
+ if os.path.isfile(mcmc_in) is True:
158
+ mcmc_file_list = [mcmc_in]
159
+ else:
160
+ mcmc_file_re = '%s/*_mcmc.txt' % (mcmc_in)
161
+ mcmc_file_list = glob.glob(mcmc_file_re)
162
+
163
+ if len(mcmc_file_list) == 0:
164
+ print('*mcmc.txt file not found, program exited!')
165
+ exit()
166
+
167
+ if specified_out_file is None:
168
+ missed_out_file_list = []
169
+ for each_mcmc_file in mcmc_file_list:
170
+ pwd_out_file = each_mcmc_file.replace('_mcmc.txt', '_out.txt')
171
+ if os.path.isfile(pwd_out_file) is False:
172
+ missed_out_file_list.append(pwd_out_file)
173
+ if len(missed_out_file_list) > 0:
174
+ print('The following *out.txt files are missing, program exited!')
175
+ print('\n'.join(sorted(missed_out_file_list)))
176
+ exit()
177
+
178
+ # read in y-axis label file
179
+ y_label_dict = dict()
180
+ if y_label_txt is not None:
181
+ for each_sample in open(y_label_txt):
182
+ each_sample_split = each_sample.strip().split('\t')
183
+ if len(each_sample_split) == 2:
184
+ y_label_dict[each_sample_split[0]] = each_sample_split[1]
185
+ else:
186
+ print('Format error: %s' % y_label_txt)
187
+ exit()
188
+
189
+ _, f_base, _ = sep_path_basename_ext(output_plot)
190
+ found_matched_node = False
191
+ op_tree_tmp = '%s_tree.txt' % f_base
192
+ op_df_tmp = '%s_data.txt' % f_base
193
+ op_label_tmp = '%s_label.txt' % f_base
194
+
195
+ op_label_tmp_handle = open(op_label_tmp, 'w')
196
+ op_tree_tmp_handle = open(op_tree_tmp, 'w')
197
+ op_df_tmp_handle = open(op_df_tmp, 'w')
198
+ op_df_tmp_handle.write('Value,Node,Setting\n')
199
+ for mcmc_file in mcmc_file_list:
200
+
201
+ mcmc_file_no_path = mcmc_file
202
+ if '/' in mcmc_file_no_path:
203
+ mcmc_file_no_path = mcmc_file_no_path.split('/')[-1]
204
+
205
+ if specified_out_file is None:
206
+ pwd_current_run_mcmc_out = mcmc_file.replace('_mcmc.txt', '_out.txt')
207
+ else:
208
+ pwd_current_run_mcmc_out = specified_out_file
209
+ node_set, node_rename_dict, tree_str = get_internal_node_to_plot(node_txt, pwd_current_run_mcmc_out)
210
+ op_tree_tmp_handle.write('%s\t%s\n' % (mcmc_file_no_path.replace('_mcmc.txt', ''), tree_str))
211
+ label_to_write = y_label_dict.get(mcmc_file_no_path, mcmc_file_no_path)
212
+ mcmc_df = pd.read_table(mcmc_file, index_col=0)
213
+ for each_col in mcmc_df:
214
+ if each_col in node_set:
215
+ node_name_to_write = node_rename_dict.get(each_col, each_col)
216
+ found_matched_node = True
217
+ value_list = mcmc_df[each_col].values
218
+ for each_value in value_list:
219
+ op_df_tmp_handle.write('%s,%s,%s\n' % (each_value, node_name_to_write, label_to_write))
220
+
221
+ op_label_tmp_handle.write('%s\t%s\t%s\n' % (label_to_write, each_col, node_name_to_write))
222
+ op_df_tmp_handle.close()
223
+ op_label_tmp_handle.close()
224
+ op_tree_tmp_handle.close()
225
+
226
+ if found_matched_node is False:
227
+ print('Provided node(s) not found, program exited!')
228
+ exit()
229
+
230
+ # plot distribution
231
+ plot_distribution(op_df_tmp, output_plot)
232
+
233
+ # remove tmp files
234
+ if keep_tmp_file is False:
235
+ os.system('rm %s' % op_tree_tmp)
236
+ os.system('rm %s' % op_df_tmp)
237
+ os.system('rm %s' % op_label_tmp)
238
+
239
+ print('Plot exported to %s, done!' % output_plot)
240
+
241
+
242
+ if __name__ == '__main__':
243
+
244
+ PlotMcmcNode_parser = argparse.ArgumentParser()
245
+ PlotMcmcNode_parser.add_argument('-i', required=True, help='folder holds the *mcmc.txt and *out.txt files')
246
+ PlotMcmcNode_parser.add_argument('-of', required=False, default=None, help='the *out.txt file')
247
+ PlotMcmcNode_parser.add_argument('-n', required=True, help='Nodes to plot')
248
+ PlotMcmcNode_parser.add_argument('-l', required=False, default=None, help='labels on y axis')
249
+ PlotMcmcNode_parser.add_argument('-o', required=True, help='Output plot')
250
+ PlotMcmcNode_parser.add_argument('-tmp', required=False, action="store_true", help='keep tmp files')
251
+ args = vars(PlotMcmcNode_parser.parse_args())
252
+ PlotMcmcNode(args)
TreeSAK/RootTree.py ADDED
@@ -0,0 +1,101 @@
1
+ import random
2
+ import dendropy
3
+ import argparse
4
+ from ete3 import Tree
5
+
6
+
7
+ RootTree_usage = '''
8
+ ====================== RootTree example commands ======================
9
+
10
+ TreeSAK RootTree -i input.tree -og outgroup_genomes.txt -o rooted.tree
11
+
12
+ =======================================================================
13
+ '''
14
+
15
+
16
+ def root_with_outgroup(input_tree, out_group_list, add_root_branch, tree_file_rooted):
17
+
18
+ """
19
+ Reroot the tree using the given outgroup.
20
+ modified based on: https://github.com/Ecogenomics/GTDBTk/blob/master/gtdbtk/reroot_tree.py
21
+
22
+ input_tree: File containing Newick tree to rerooted.
23
+ output_tree: Name of file for rerooted tree.
24
+ outgroup: Labels of taxa in outgroup.
25
+ """
26
+
27
+ tree = dendropy.Tree.get_from_path(input_tree, schema='newick', rooting='force-rooted', preserve_underscores=True)
28
+
29
+ outgroup_in_tree = set()
30
+ ingroup_leaves = set()
31
+ for n in tree.leaf_node_iter():
32
+ if n.taxon.label in out_group_list:
33
+ outgroup_in_tree.add(n.taxon)
34
+ else:
35
+ ingroup_leaves.add(n)
36
+
37
+ # Since finding the MRCA is a rooted tree operation, the tree is first rerooted on an ingroup taxa. This
38
+ # ensures the MRCA of the outgroup can be identified so long as the outgroup is monophyletic. If the
39
+ # outgroup is polyphyletic trying to root on it is ill-defined. To try and pick a "good" root for
40
+ # polyphyletic outgroups, random ingroup taxa are selected until two of them give the same size
41
+ # lineage. This will, likely, be the smallest bipartition possible for the given outgroup though
42
+ # this is not guaranteed.
43
+
44
+ mrca = tree.mrca(taxa=outgroup_in_tree)
45
+ mrca_leaves = len(mrca.leaf_nodes())
46
+ while True:
47
+ rnd_ingroup = random.sample(list(ingroup_leaves), 1)[0]
48
+ tree.reroot_at_edge(rnd_ingroup.edge, length1=0.5 * rnd_ingroup.edge_length, length2=0.5 * rnd_ingroup.edge_length)
49
+ mrca = tree.mrca(taxa=outgroup_in_tree)
50
+ if len(mrca.leaf_nodes()) == mrca_leaves:
51
+ break
52
+ mrca_leaves = len(mrca.leaf_nodes())
53
+
54
+ if mrca.edge_length is not None:
55
+ tree.reroot_at_edge(mrca.edge, length1=0.5 * mrca.edge_length, length2=0.5 * mrca.edge_length)
56
+
57
+ # tree.write_to_path(tree_file_rooted, schema='newick', suppress_rooting=True, unquoted_underscores=True)
58
+ tree_out_string = tree.as_string(schema='newick', suppress_rooting=True, unquoted_underscores=True)
59
+ tree_out_string = tree_out_string.replace("'", "")
60
+
61
+ # add the root bar
62
+ if add_root_branch is True:
63
+ tree_out_string = '(' + tree_out_string
64
+ tree_out_string = tree_out_string.replace(');', '):0.02);')
65
+
66
+ # write out tree string
67
+ tree_file_rooted_handle = open(tree_file_rooted, 'w')
68
+ tree_file_rooted_handle.write(tree_out_string)
69
+ tree_file_rooted_handle.close()
70
+
71
+
72
+ def RootTree(args):
73
+
74
+ tree_file = args['i']
75
+ out_group_txt = args['og']
76
+ tree_file_rooted = args['o']
77
+ tree_fmt = args['fmt']
78
+ add_root_branch = args['add_root']
79
+
80
+ out_group_set = set()
81
+ for each_og in open(out_group_txt):
82
+ out_group_set.add(each_og.strip())
83
+
84
+ # tre = Tree(tree_file, format=tree_fmt)
85
+ # out_group_lca = tre.get_common_ancestor(out_group_set)
86
+ # tre.set_outgroup(out_group_lca)
87
+ # tre.write(outfile=tree_file_rooted, format=tree_fmt)
88
+
89
+ root_with_outgroup(tree_file, out_group_set, add_root_branch, tree_file_rooted)
90
+
91
+
92
+ if __name__ == '__main__':
93
+
94
+ RootTree_parser = argparse.ArgumentParser()
95
+ RootTree_parser.add_argument('-i', required=True, help='input tree')
96
+ RootTree_parser.add_argument('-og', required=True, help='out group leaves')
97
+ RootTree_parser.add_argument('-o', required=True, help='output tree')
98
+ RootTree_parser.add_argument('-add_root', required=False, action='store_true', help='add the root branch')
99
+ RootTree_parser.add_argument('-fmt', required=False, default=1, type=int, help='tree format, default: 1')
100
+ args = vars(RootTree_parser.parse_args())
101
+ RootTree(args)