treesak 1.51.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of treesak might be problematic. Click here for more details.

Files changed (125) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +130 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/CompareMCMC.py +138 -0
  19. TreeSAK/ConcateMSA.py +111 -0
  20. TreeSAK/ConvertMSA.py +135 -0
  21. TreeSAK/Dir.rb +82 -0
  22. TreeSAK/ExtractMarkerSeq.py +263 -0
  23. TreeSAK/FastRoot.py +1175 -0
  24. TreeSAK/FastRoot_backup.py +1122 -0
  25. TreeSAK/FigTree.py +34 -0
  26. TreeSAK/GTDB_tree.py +76 -0
  27. TreeSAK/GeneTree.py +142 -0
  28. TreeSAK/KEGG_Luo17.py +807 -0
  29. TreeSAK/LcaToLeaves.py +66 -0
  30. TreeSAK/MarkerRef2Tree.py +616 -0
  31. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  32. TreeSAK/MarkerSeq2Tree.py +290 -0
  33. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  34. TreeSAK/ModifyTopo.py +116 -0
  35. TreeSAK/Newick_tree_plotter.py +79 -0
  36. TreeSAK/OMA.py +170 -0
  37. TreeSAK/OMA2.py +212 -0
  38. TreeSAK/OneLineAln.py +50 -0
  39. TreeSAK/PB.py +155 -0
  40. TreeSAK/PMSF.py +106 -0
  41. TreeSAK/PhyloBiAssoc.R +84 -0
  42. TreeSAK/PhyloBiAssoc.py +167 -0
  43. TreeSAK/PlotMCMC.py +41 -0
  44. TreeSAK/PlotMcmcNode.py +152 -0
  45. TreeSAK/PlotMcmcNode_old.py +252 -0
  46. TreeSAK/RootTree.py +101 -0
  47. TreeSAK/RootTreeGTDB214.py +288 -0
  48. TreeSAK/RootTreeGTDB220.py +300 -0
  49. TreeSAK/RootTreeGTDB226.py +300 -0
  50. TreeSAK/SequentialDating.py +16 -0
  51. TreeSAK/SingleAleHGT.py +157 -0
  52. TreeSAK/SingleLinePhy.py +50 -0
  53. TreeSAK/SliceMSA.py +142 -0
  54. TreeSAK/SplitScore.py +19 -0
  55. TreeSAK/SplitScore1.py +178 -0
  56. TreeSAK/SplitScore1OMA.py +148 -0
  57. TreeSAK/SplitScore2.py +597 -0
  58. TreeSAK/TaxaCountStats.R +256 -0
  59. TreeSAK/TaxonTree.py +47 -0
  60. TreeSAK/TreeSAK_config.py +32 -0
  61. TreeSAK/VERSION +158 -0
  62. TreeSAK/VisHPD95.R +45 -0
  63. TreeSAK/VisHPD95.py +200 -0
  64. TreeSAK/__init__.py +0 -0
  65. TreeSAK/ale_parser.py +74 -0
  66. TreeSAK/ale_splitter.py +63 -0
  67. TreeSAK/alignment_pruner.pl +1471 -0
  68. TreeSAK/assessOG.py +45 -0
  69. TreeSAK/catfasta2phy.py +140 -0
  70. TreeSAK/cogTree.py +185 -0
  71. TreeSAK/compare_trees.R +30 -0
  72. TreeSAK/compare_trees.py +255 -0
  73. TreeSAK/dating.py +264 -0
  74. TreeSAK/dating_ss.py +361 -0
  75. TreeSAK/deltall.py +82 -0
  76. TreeSAK/do_rrtc.rb +464 -0
  77. TreeSAK/fa2phy.py +42 -0
  78. TreeSAK/format_leaf_name.py +70 -0
  79. TreeSAK/gap_stats.py +38 -0
  80. TreeSAK/get_SCG_tree.py +742 -0
  81. TreeSAK/get_arCOG_seq.py +97 -0
  82. TreeSAK/global_functions.py +222 -0
  83. TreeSAK/gnm_leaves.py +43 -0
  84. TreeSAK/iTOL.py +791 -0
  85. TreeSAK/iTOL_gene_tree.py +80 -0
  86. TreeSAK/itol_msa_stats.py +56 -0
  87. TreeSAK/keep_highest_rrtc.py +37 -0
  88. TreeSAK/koTree.py +194 -0
  89. TreeSAK/label_tree.R +75 -0
  90. TreeSAK/label_tree.py +121 -0
  91. TreeSAK/mad.py +708 -0
  92. TreeSAK/mcmc2tree.py +58 -0
  93. TreeSAK/mcmcTC copy.py +92 -0
  94. TreeSAK/mcmcTC.py +104 -0
  95. TreeSAK/mcmctree_vs_reltime.R +44 -0
  96. TreeSAK/mcmctree_vs_reltime.py +252 -0
  97. TreeSAK/merge_pdf.py +32 -0
  98. TreeSAK/pRTC.py +56 -0
  99. TreeSAK/parse_mcmctree.py +198 -0
  100. TreeSAK/parse_reltime.py +141 -0
  101. TreeSAK/phy2fa.py +37 -0
  102. TreeSAK/plot_distruibution_th.py +165 -0
  103. TreeSAK/prep_mcmctree_ctl.py +92 -0
  104. TreeSAK/print_leaves.py +32 -0
  105. TreeSAK/pruneMSA.py +63 -0
  106. TreeSAK/recode.py +73 -0
  107. TreeSAK/remove_bias.R +112 -0
  108. TreeSAK/rename_leaves.py +77 -0
  109. TreeSAK/replace_clade.py +55 -0
  110. TreeSAK/root_with_out_group.py +84 -0
  111. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  112. TreeSAK/subsample_drep_gnms.py +74 -0
  113. TreeSAK/subset.py +69 -0
  114. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  115. TreeSAK/supertree.py +330 -0
  116. TreeSAK/tmp_1.py +19 -0
  117. TreeSAK/tmp_2.py +19 -0
  118. TreeSAK/tmp_3.py +120 -0
  119. TreeSAK/weighted_rand.rb +23 -0
  120. treesak-1.51.2.data/scripts/TreeSAK +950 -0
  121. treesak-1.51.2.dist-info/LICENSE +674 -0
  122. treesak-1.51.2.dist-info/METADATA +27 -0
  123. treesak-1.51.2.dist-info/RECORD +125 -0
  124. treesak-1.51.2.dist-info/WHEEL +5 -0
  125. treesak-1.51.2.dist-info/top_level.txt +1 -0
TreeSAK/ALE_backup.py ADDED
@@ -0,0 +1,1081 @@
1
+ import os
2
+ import glob
3
+ import math
4
+ import random
5
+ import argparse
6
+ import seaborn as sns
7
+ from Bio import SeqIO
8
+ from ete3 import Tree
9
+ from PIL import Image
10
+ from itolapi import Itol
11
+ import multiprocessing as mp
12
+ from PyPDF3.pdf import PageObject
13
+ from PyPDF3 import PdfFileWriter, PdfFileReader
14
+ from ete3 import TextFace, TreeStyle, NodeStyle
15
+
16
+
17
+ ALE_usage = '''
18
+ ========================= ALE example commands =========================
19
+
20
+ TreeSAK ALE1 -> step 1: get gene tree
21
+ TreeSAK ALE2 -> step 2: run ALE
22
+ TreeSAK ALE3 -> step 3: parse ALE output
23
+ TreeSAK ALE4 -> Infer ancestral genome
24
+
25
+ cd /Users/songweizhi/Desktop/demo
26
+ TreeSAK ALE1 -i OrthologousGroups.txt -s combined_d__Archaea_o_rs.faa -p oma -c genome_taxon.txt -m 50 -n 2 -t 6 -jt 3 -f -o ALE1_op_dir
27
+ TreeSAK ALE2 -i ALE1_op_dir -s Marker_set_1_PA_75_C60_PMSF_concatenated_rooted.treefile -c genome_taxon.txt -t 6 -f -o ALE2_op_dir
28
+ TreeSAK ALE3 -i1 ALE1_op_dir -i2 ALE2_op_dir -c genome_taxon.txt -color ar_phylum_color_code.txt -o ALE3_op_dir_0.3 -fc 0.3 -f -api S1kZZuDHc0d5M7J5vLnUNQ
29
+ TreeSAK ALE3 -i1 ALE1_op_dir -i2 ALE2_op_dir -c genome_taxon.txt -color ar_phylum_color_code.txt -o ALE3_op_dir_0.5 -fc 0.5 -f -api S1kZZuDHc0d5M7J5vLnUNQ
30
+ TreeSAK ALE3 -i1 ALE1_op_dir -i2 ALE2_op_dir -c genome_taxon.txt -color ar_phylum_color_code.txt -o ALE3_op_dir_0.8 -fc 0.8 -f -api S1kZZuDHc0d5M7J5vLnUNQ
31
+
32
+
33
+ ALEobserve OMA00003_for_ALE.ufboot
34
+
35
+
36
+ cd /Users/songweizhi/Desktop/demo/subset_gnm_tree_no
37
+ docker run -v $PWD:$PWD -w $PWD gregmich/alesuite_new ALEobserve OMA00003_for_ALE.ufboot
38
+ docker run -v $PWD:$PWD -w $PWD gregmich/alesuite_new ALEml_undated genome_tree_rooted.treefile OMA00003_for_ALE.ufboot.ale
39
+
40
+ cd /Users/songweizhi/Desktop/demo/subset_gnm_tree_yes
41
+ docker run -v $PWD:$PWD -w $PWD gregmich/alesuite_new ALEobserve OMA00003_for_ALE.ufboot
42
+ docker run -v $PWD:$PWD -w $PWD gregmich/alesuite_new ALEml_undated OMA00003_genome_tree_for_ALE.treefile OMA00003_for_ALE.ufboot.ale
43
+
44
+
45
+
46
+ ========================================================================
47
+ '''
48
+
49
+
50
+ def sep_path_basename_ext(file_in):
51
+
52
+ # separate path and file name
53
+ f_path, file_name = os.path.split(file_in)
54
+ if f_path == '':
55
+ f_path = '.'
56
+
57
+ # separate file basename and extension
58
+ f_base, f_ext = os.path.splitext(file_name)
59
+
60
+ return f_path, f_base, f_ext
61
+
62
+
63
+ def select_seq(arg_list):
64
+
65
+ seq_file = arg_list[0]
66
+ id_file = arg_list[1]
67
+ output_file = arg_list[2]
68
+
69
+ seq_id_set = {i.strip() for i in open(id_file)}
70
+ output_file_handle = open(output_file, 'w')
71
+ for seq_record in SeqIO.parse(seq_file, 'fasta'):
72
+ if seq_record.id in seq_id_set:
73
+ SeqIO.write(seq_record, output_file_handle, 'fasta-2line')
74
+ output_file_handle.close()
75
+
76
+
77
+ def root_with_out_group(tree_file, out_group_txt, tree_file_rooted):
78
+
79
+ out_group_set = set()
80
+ for each_og in open(out_group_txt):
81
+ out_group_set.add(each_og.strip())
82
+
83
+ tre = Tree(tree_file, format=1)
84
+ out_group_lca = tre.get_common_ancestor(out_group_set)
85
+ tre.set_outgroup(out_group_lca)
86
+ tre.write(outfile=tree_file_rooted)
87
+
88
+
89
+ def subset_tree(tree_file_in, leaves_to_keep_list, tree_file_out):
90
+
91
+ input_tree = Tree(tree_file_in)
92
+ subset_tree = input_tree.copy()
93
+ subset_tree.prune(leaves_to_keep_list, preserve_branch_length=True)
94
+ if tree_file_out is None:
95
+ return subset_tree.write()
96
+ else:
97
+ subset_tree.write(outfile=tree_file_out)
98
+
99
+
100
+ def plot_tree(input_tree, tree_title, node_label_dict, node_label_color_dict, align_leaf_label, show_scale, output_plot):
101
+
102
+ if os.path.isfile(input_tree) is False:
103
+ print('Tree file not found, program exited!')
104
+ print(input_tree)
105
+ exit()
106
+
107
+ t = Tree(input_tree)
108
+ ts = TreeStyle()
109
+ ts.mode = "r" # tree model: 'r' for rectangular, 'c' for circular
110
+ ts.show_border = False # set tree image border
111
+ ts.show_leaf_name = False # show/hide leaf name, hide here, so you can customise it below with node.add_face()
112
+ ts.title.add_face(TextFace(tree_title, fsize=9, fgcolor='black', ftype='Arial', tight_text=False), column=0) # add tree title
113
+
114
+ # set node style
115
+ for each_node in t.traverse():
116
+ ns = NodeStyle()
117
+ ns["shape"] = "circle" # dot shape: circle, square or sphere
118
+ ns["fgcolor"] = "black" # color of shape(not label)
119
+ ns['size'] = 0 # node shape size
120
+ ns['hz_line_type'] = 0 # horizontal branch line type: 0 for solid, 1 for dashed, 2 for dotted
121
+ ns['vt_line_type'] = 0 # vertical branch line type: 0 for solid, 1 for dashed, 2 for dotted
122
+ ns['hz_line_width'] = 0.5 # horizontal branch line width
123
+ ns['vt_line_width'] = 0.5 # vertical branch line width
124
+
125
+ leaf_label_position = 'branch-right'
126
+ if align_leaf_label is True:
127
+ leaf_label_position = 'aligned'
128
+
129
+ if each_node.is_leaf():
130
+ node_id = each_node.name
131
+ node_label_color = node_label_color_dict.get(node_id, 'black')
132
+ node_label_text = node_label_dict.get(node_id, node_id)
133
+ each_node.add_face(TextFace(node_label_text, fsize=8, fgcolor=node_label_color, tight_text=False, bold=False),
134
+ column=0, position=leaf_label_position) # aligned, branch-right
135
+ else:
136
+ pass
137
+ each_node.set_style(ns)
138
+
139
+ # set layout
140
+ ts.rotation = 0 # from 0 to 360
141
+ ts.margin_top = 10 # top tree image margin
142
+ ts.margin_bottom = 10 # bottom tree image margin
143
+ ts.margin_left = 10 # left tree image margin
144
+ ts.margin_right = 10 # right tree image margin
145
+ ts.branch_vertical_margin = 3 # 3 pixels between adjancent branches
146
+ ts.show_scale = show_scale # show_scale
147
+ ts.show_border = False # set tree image border
148
+
149
+ # write out tree
150
+ t.render(output_plot, w=1200, units="px", tree_style=ts)
151
+
152
+
153
+ def merge_image(image_file_list, output_image):
154
+
155
+ images = [Image.open(x) for x in image_file_list]
156
+ widths, heights = zip(*(i.size for i in images))
157
+ total_width = sum(widths)
158
+ max_height = max(heights)
159
+ new_im = Image.new('RGB', (total_width, max_height), color='white')
160
+
161
+ x_offset = 0
162
+ for im in images:
163
+ new_im.paste(im, (x_offset, 0))
164
+ x_offset += im.size[0]
165
+
166
+ new_im.save(output_image)
167
+
168
+
169
+ def merge_pdf(pdf_1, pdf_2, margin_size, op_pdf):
170
+
171
+ page1 = PdfFileReader(open(pdf_1, "rb"), strict=False).getPage(0)
172
+ page2 = PdfFileReader(open(pdf_2, "rb"), strict=False).getPage(0)
173
+
174
+ total_width = page1.mediaBox.upperRight[0] + page2.mediaBox.upperRight[0] + margin_size*3
175
+ total_height = max([page1.mediaBox.upperRight[1], page2.mediaBox.upperRight[1]]) + margin_size*2
176
+
177
+ new_page = PageObject.createBlankPage(None, total_width, total_height)
178
+ new_page.mergeTranslatedPage(page1, margin_size, (total_height-margin_size-page1.mediaBox.upperRight[1]))
179
+ new_page.mergeTranslatedPage(page2, (page1.mediaBox.upperRight[0] + margin_size*2), margin_size)
180
+
181
+ output = PdfFileWriter()
182
+ output.addPage(new_page)
183
+ output.write(open(op_pdf, "wb"))
184
+
185
+
186
+ def uts_to_itol_connections(genome_tree_file, ale_formatted_gnm_tree, interal_node_prefix, uts_file, freq_cutoff, ignore_leaf_hgt, ignore_vertical_hgt, donor_node_min_leaf_num, recipient_node_min_leaf_num, itol_connection_txt, dr_separator):
187
+
188
+ # get internal_node_to_leaf_dict
189
+ internal_node_to_leaf_dict = get_node_to_leaf_dict(ale_formatted_gnm_tree)
190
+
191
+ paired_donor_to_recipient_leaf_dict = dict()
192
+ qualified_hgt_num = 0
193
+
194
+ leaf_id_set = []
195
+ if os.path.isfile(genome_tree_file):
196
+ leaf_id_set = [i.name for i in Tree(genome_tree_file, format=3).get_leaves()]
197
+ else:
198
+ print('%s not found!' % genome_tree_file)
199
+
200
+ hgt_freq_dict = dict()
201
+ connection_line_to_write_dict = dict()
202
+ with open(itol_connection_txt, 'w') as itol_connection_txt_handle:
203
+ itol_connection_txt_handle.write('DATASET_CONNECTION\nSEPARATOR TAB\nDATASET_LABEL\tdemo_connections\n')
204
+ itol_connection_txt_handle.write('COLOR\t#ff0ff0\nDRAW_ARROWS\t1\nARROW_SIZE\t60\nLOOP_SIZE\t100\n')
205
+ itol_connection_txt_handle.write('MAXIMUM_LINE_WIDTH\t10\nCURVE_ANGLE\t45\nCENTER_CURVES\t1\nALIGN_TO_LABELS\t0\nDATA\n')
206
+ for each_line in open(uts_file):
207
+ if not each_line.startswith('#'):
208
+ each_line_split = each_line.strip().split('\t')
209
+ donor = each_line_split[0]
210
+ recipient = each_line_split[1]
211
+ freq = float(each_line_split[2])
212
+
213
+ # add prefix to internal donor node
214
+ if donor in leaf_id_set:
215
+ donor_with_prefix = donor
216
+ else:
217
+ donor_with_prefix = interal_node_prefix + donor
218
+
219
+ # add prefix to internal recipient node
220
+ if recipient in leaf_id_set:
221
+ recipient_with_prefix = recipient
222
+ else:
223
+ recipient_with_prefix = interal_node_prefix + recipient
224
+
225
+ key_str = '%s%s%s' % (donor_with_prefix, dr_separator, recipient_with_prefix)
226
+
227
+ line_to_write = ''
228
+ if freq >= freq_cutoff:
229
+ if ignore_leaf_hgt is False:
230
+ if ignore_vertical_hgt is False:
231
+ line_to_write = '%s\t%s\t%s\t%s\t%s\t%s->%s(%s)\n' % (donor_with_prefix, recipient_with_prefix, freq, '#EB984E', 'normal', donor_with_prefix, recipient_with_prefix, freq)
232
+ qualified_hgt_num += 1
233
+ else:
234
+ donor_is_ancestor_of_recipient = check_a_is_ancestor_of_b(ale_formatted_gnm_tree, donor, recipient)
235
+ donor_is_child_of_recipient = check_a_is_child_of_b(ale_formatted_gnm_tree, donor, recipient)
236
+ if (donor_is_ancestor_of_recipient is False) and (donor_is_child_of_recipient is False):
237
+ line_to_write = '%s\t%s\t%s\t%s\t%s\t%s->%s(%s)\n' % (donor_with_prefix, recipient_with_prefix, freq, '#EB984E', 'normal', donor_with_prefix, recipient_with_prefix, freq)
238
+ qualified_hgt_num += 1
239
+ else:
240
+ if (each_line_split[0] not in leaf_id_set) and (each_line_split[1] not in leaf_id_set):
241
+ donor_node_leaf_num = len(internal_node_to_leaf_dict.get(donor, []))
242
+ recipient_node_leaf_num = len(internal_node_to_leaf_dict.get(recipient, []))
243
+ if (donor_node_leaf_num >= donor_node_min_leaf_num) and (recipient_node_leaf_num >= recipient_node_min_leaf_num):
244
+ if ignore_vertical_hgt is False:
245
+ line_to_write = '%s\t%s\t%s\t%s\t%s\t%s->%s(%s)\n' % (donor_with_prefix, recipient_with_prefix, freq, '#EB984E', 'normal', donor_with_prefix, recipient_with_prefix, freq)
246
+ qualified_hgt_num += 1
247
+ else:
248
+ donor_is_ancestor_of_recipient = check_a_is_ancestor_of_b(ale_formatted_gnm_tree, donor, recipient)
249
+ donor_is_child_of_recipient = check_a_is_child_of_b(ale_formatted_gnm_tree, donor, recipient)
250
+ if (donor_is_ancestor_of_recipient is False) and (donor_is_child_of_recipient is False):
251
+ line_to_write = '%s\t%s\t%s\t%s\t%s\t%s->%s(%s)\n' % (donor_with_prefix, recipient_with_prefix, freq, '#EB984E', 'normal', donor_with_prefix, recipient_with_prefix, freq)
252
+ qualified_hgt_num += 1
253
+ paired_donor_to_recipient_leaf_dict[key_str] = [internal_node_to_leaf_dict.get(donor, []), internal_node_to_leaf_dict.get(recipient, [])]
254
+
255
+ if line_to_write != '':
256
+ itol_connection_txt_handle.write(line_to_write)
257
+ connection_line_to_write_dict[key_str] = line_to_write
258
+ hgt_freq_dict[key_str] = freq
259
+
260
+ combined_connection_file_path, combined_connection_file_basename, combined_connection_file_ext = sep_path_basename_ext(itol_connection_txt)
261
+
262
+ # write out connections separately
263
+ for each_connection in connection_line_to_write_dict:
264
+ pwd_connection_txt = '%s/%s_%s.txt' % (combined_connection_file_path, combined_connection_file_basename, each_connection)
265
+ pwd_connection_txt_handle = open(pwd_connection_txt, 'w')
266
+ pwd_connection_txt_handle.write('DATASET_CONNECTION\nSEPARATOR TAB\nDATASET_LABEL\tdemo_connections\n')
267
+ pwd_connection_txt_handle.write('COLOR\t#ff0ff0\nDRAW_ARROWS\t1\nARROW_SIZE\t60\nLOOP_SIZE\t100\n')
268
+ pwd_connection_txt_handle.write('MAXIMUM_LINE_WIDTH\t10\nCURVE_ANGLE\t45\nCENTER_CURVES\t1\nALIGN_TO_LABELS\t0\nDATA\n')
269
+ pwd_connection_txt_handle.write(connection_line_to_write_dict[each_connection] + '\n')
270
+ pwd_connection_txt_handle.close()
271
+
272
+ return internal_node_to_leaf_dict, paired_donor_to_recipient_leaf_dict, hgt_freq_dict
273
+
274
+
275
+ def itol_tree(tree_file, annotation_file_list, project_name, APIkey, display_mode, op_plot):
276
+
277
+ # https://github.com/albertyw/itolapi
278
+ # http://itol.embl.de/help.cgi#batch
279
+
280
+ op_plot_ext = op_plot.split('.')[-1]
281
+
282
+ # upload tree to iTOL
283
+ itol_uploader = Itol()
284
+ itol_uploader.params['projectName'] = project_name # better to create a project with a unique name.
285
+ itol_uploader.params['APIkey'] = APIkey # sine we are the same account, we can use the same APIkey
286
+ itol_uploader.params['treeName'] = tree_file
287
+ itol_uploader.add_file(tree_file)
288
+
289
+ # upload annotation files to iTOL
290
+ for annotation_file in annotation_file_list:
291
+ itol_uploader.add_file(annotation_file)
292
+
293
+ status = itol_uploader.upload()
294
+ # import pdb;pdb.set_trace()
295
+ assert status != False
296
+
297
+ # the following parameters are optional, refer to https://itol.embl.de/help.cgi#batchExp
298
+ if len(annotation_file_list) == 1:
299
+ datasets_visible_str = '0'
300
+ elif len(annotation_file_list) == 2:
301
+ datasets_visible_str = '0,1'
302
+ elif len(annotation_file_list) == 3:
303
+ datasets_visible_str = '0,1,2'
304
+ else:
305
+ datasets_visible_str = ','.join([str(i) for i in list(range(0, len(annotation_file_list)))])
306
+ itol_exporter = itol_uploader.get_itol_export()
307
+ itol_exporter.set_export_param_value('datasets_visible', datasets_visible_str)
308
+ itol_exporter.set_export_param_value('display_mode', display_mode)
309
+ itol_exporter.set_export_param_value('range_mode', '2')
310
+ itol_exporter.set_export_param_value('dashed_lines', '1')
311
+ # itol_exporter.set_export_param_value('current_font_size', '96')
312
+ itol_exporter.set_export_param_value('line_width', '3')
313
+ itol_exporter.set_export_param_value('vertical_shift_factor', '0.9')
314
+ itol_exporter.set_export_param_value('horizontal_scale_factor', '0.9')
315
+ itol_exporter.set_export_param_value('format', op_plot_ext)
316
+ itol_exporter.export(op_plot)
317
+
318
+
319
+ def get_node_to_leaf_dict(tree_file):
320
+ internal_node_to_leaf_dict = dict()
321
+ for node in Tree(tree_file, format=1).traverse():
322
+ if not node.is_leaf():
323
+ node_name = node.name
324
+ node_leaf_list = node.get_leaf_names()
325
+ internal_node_to_leaf_dict[node_name] = node_leaf_list
326
+ return internal_node_to_leaf_dict
327
+
328
+
329
+ def combine_trees(t1_with_len, t2_with_name, op_tree_with_both):
330
+
331
+ # assume t1 has brancn length
332
+ # assume t2 has internal node name
333
+
334
+ t1 = Tree(t1_with_len, format=0)
335
+ t2 = Tree(t2_with_name, format=1)
336
+
337
+ t1_leaves_to_node_dict = dict()
338
+ for t1_node in t1.traverse():
339
+ leaf_str = '__'.join(sorted(list(t1_node.get_leaf_names())))
340
+ t1_leaves_to_node_dict[leaf_str] = t1_node
341
+
342
+ t2_leaves_to_node_dict = dict()
343
+ for t2_node in t2.traverse():
344
+ leaf_str = '__'.join(sorted(list(t2_node.get_leaf_names())))
345
+ t2_leaves_to_node_dict[leaf_str] = t2_node
346
+
347
+ t1_node_to_t2_node_dict = dict()
348
+ for index, t1_node in t1_leaves_to_node_dict.items():
349
+ t2_node = t2_leaves_to_node_dict[index]
350
+ t1_node_to_t2_node_dict[t1_node] = t2_node
351
+
352
+ merged_tree = t1.copy()
353
+ for node, t1_node in zip(merged_tree.traverse(), t1.traverse()):
354
+ node.name = t1_node_to_t2_node_dict[t1_node].name
355
+ merged_tree.write(outfile=op_tree_with_both, format=3)
356
+
357
+
358
+ def prefix_internal_nodes(tree_in, prefix_str, tree_out):
359
+ t = Tree(tree_in, format=3)
360
+ t_renamed = t.copy()
361
+ for node in t_renamed.traverse():
362
+ if not node.is_leaf():
363
+ node_name_prefixed = '%s%s' % (prefix_str, node.name)
364
+ node.name = node_name_prefixed
365
+ t_renamed.write(outfile=tree_out, format=3)
366
+
367
+
368
+ def check_a_is_ancestor_of_b(tree_file, node_a, node_b):
369
+
370
+ a_is_ancestor_of_b = False
371
+ for node in Tree(tree_file, format=1).traverse():
372
+ node_name = node.name
373
+ if node_name == node_b:
374
+ node_ancestor_list = [i.name for i in node.get_ancestors()]
375
+ if node_a in node_ancestor_list:
376
+ a_is_ancestor_of_b = True
377
+
378
+ return a_is_ancestor_of_b
379
+
380
+
381
+ def check_a_is_child_of_b(tree_file, node_a, node_b):
382
+
383
+ a_is_child_of_b = False
384
+ for node in Tree(tree_file, format=1).traverse():
385
+ node_name = node.name
386
+ if node_name == node_b:
387
+ node_children_list = [i.name for i in node.get_descendants()]
388
+ if node_a in node_children_list:
389
+ a_is_child_of_b = True
390
+
391
+ return a_is_child_of_b
392
+
393
+
394
+ def root_at_midpoint(tree_in, tree_in_rooted):
395
+ t = Tree(tree_in)
396
+ midpoint = t.get_midpoint_outgroup()
397
+ t.set_outgroup(midpoint)
398
+ t.write(outfile=tree_in_rooted)
399
+
400
+
401
+ def get_color_list(color_num):
402
+ if color_num <= 8:
403
+ color_list_combined = ['#3787c0', '#39399f', '#ffb939', '#399f39', '#9f399f', '#fb694a', '#9f9f39', '#959595']
404
+
405
+ elif 8 < color_num <= 16:
406
+ color_list_combined = ['#2b7bba', '#89bedc', '#2e2e99', '#8a8acc', '#ffa500', '#ffc55c', '#2e992e', '#8acc8a',
407
+ '#992e99', '#cc8acc', '#d52221', '#fc8161', '#99992e', '#cccc8a', '#5c5c5c', '#adadad']
408
+ else:
409
+ color_num_each = math.ceil(color_num / 8) + 2
410
+ color_list_1 = sns.color_palette('Blues', n_colors=color_num_each).as_hex()
411
+ color_list_2 = sns.light_palette('navy', n_colors=color_num_each).as_hex()
412
+ color_list_3 = sns.light_palette('orange', n_colors=color_num_each).as_hex()
413
+ color_list_4 = sns.light_palette('green', n_colors=color_num_each).as_hex()
414
+ color_list_5 = sns.light_palette('purple', n_colors=color_num_each).as_hex()
415
+ color_list_6 = sns.color_palette('Reds', n_colors=color_num_each).as_hex()
416
+ color_list_7 = sns.light_palette('olive', n_colors=color_num_each).as_hex()
417
+ color_list_8 = sns.color_palette('Greys', n_colors=color_num_each).as_hex()
418
+
419
+ color_list_combined = []
420
+ for color_list in [color_list_1, color_list_2, color_list_3, color_list_4, color_list_5, color_list_6,
421
+ color_list_7, color_list_8]:
422
+ for color in color_list[2:][::-1]:
423
+ color_list_combined.append(color)
424
+
425
+ color_list_to_return = random.sample(color_list_combined, color_num)
426
+
427
+ color_list_to_return_sorted = []
428
+ for color_to_return in color_list_combined:
429
+ if color_to_return in color_list_to_return:
430
+ color_list_to_return_sorted.append(color_to_return)
431
+
432
+ return color_list_to_return_sorted
433
+
434
+
435
+ def scale_str_to_size_list(scale_str):
436
+ scale_list = scale_str.split('-')
437
+ scale_list = [float(i) for i in scale_list]
438
+
439
+ shape_size_list = []
440
+ if scale_list[0] == 0:
441
+ shape_size_list = [0]
442
+ for each_value in scale_list[1:-1]:
443
+ current_size = each_value / scale_list[-1]
444
+ shape_size_list.append(current_size)
445
+ shape_size_list.append(1)
446
+
447
+ if scale_list[0] != 0:
448
+ shape_size_list = [0.1]
449
+ interval_num = len(scale_list) - 1
450
+ interval_value = (1 - 0.1) / interval_num
451
+ n = 1
452
+ for each_value in scale_list[1:-1]:
453
+ shape_size_list.append(interval_value * n + 0.1)
454
+ n += 1
455
+ shape_size_list.append(1)
456
+
457
+ return shape_size_list
458
+
459
+
460
+ def get_ortho_to_gene_dict(ortho_groups_txt, og_program):
461
+
462
+ ortho_to_gene_dict = dict()
463
+ for each_og in open(ortho_groups_txt):
464
+ if not each_og.startswith('#'):
465
+ og_id = ''
466
+ gene_list = []
467
+ if og_program == 'orthofinder':
468
+ each_og_split = each_og.strip().split(' ')
469
+ og_id = each_og_split[0][:-1]
470
+ gene_list = each_og_split[1:]
471
+ elif og_program == 'oma':
472
+ each_og_split = each_og.strip().split('\t')
473
+ og_id = each_og_split[0]
474
+ group_member_list = each_og_split[1:]
475
+ for each_protein in group_member_list:
476
+ protein_id = each_protein.split(' ')[0].split(':')[1]
477
+ gene_list.append(protein_id)
478
+ ortho_to_gene_dict[og_id] = gene_list
479
+
480
+ return ortho_to_gene_dict
481
+
482
+
483
+ def prepare_ale_ip_worker(arg_list):
484
+
485
+ qualified_og = arg_list[0]
486
+ gene_tree_dir = arg_list[1]
487
+ ale_wd = arg_list[2]
488
+ genome_tree_file_rooted = arg_list[3]
489
+ gnm_pco_dict = arg_list[4]
490
+ gene_tree_ufboot_for_ale = arg_list[5]
491
+ genome_tree_file_subset_for_ale = arg_list[6]
492
+
493
+ genome_tree_file_subset = '%s_genome_tree.treefile' % qualified_og
494
+ gene_tree_ufboot = '%s.ufboot' % qualified_og
495
+ gene_tree_treefile = '%s.treefile' % qualified_og
496
+ gene_tree_treefile_subset = '%s_subset.treefile' % qualified_og
497
+ pwd_genome_tree_file_subset = '%s/%s' % (gene_tree_dir, genome_tree_file_subset)
498
+ pwd_genome_tree_file_subset_for_ale = '%s/%s' % (ale_wd, genome_tree_file_subset_for_ale)
499
+ pwd_gene_tree_ufboot = '%s/%s' % (gene_tree_dir, gene_tree_ufboot)
500
+ pwd_gene_tree_ufboot_for_ale = '%s/%s' % (ale_wd, gene_tree_ufboot_for_ale)
501
+ pwd_gene_tree_treefile = '%s/%s' % (gene_tree_dir, gene_tree_treefile)
502
+ pwd_gene_tree_treefile_subset = '%s/%s' % (gene_tree_dir, gene_tree_treefile_subset)
503
+
504
+ # get genomes on gene tree
505
+ gene_gnm_set = set()
506
+ gnm_to_gene_dict = dict()
507
+ for each_gene in Tree(pwd_gene_tree_treefile).get_leaf_names():
508
+
509
+ # get gnm id
510
+ gene_gnm = '_'.join(each_gene.split('_')[:-1])
511
+
512
+ gene_gnm_set.add(gene_gnm)
513
+ if gene_gnm not in gnm_to_gene_dict:
514
+ gnm_to_gene_dict[gene_gnm] = {each_gene}
515
+ else:
516
+ gnm_to_gene_dict[gene_gnm].add(each_gene)
517
+
518
+ # subset genome tree
519
+ genome_tree_leaf_set = Tree(genome_tree_file_rooted).get_leaf_names()
520
+ gnms_in_both_trees = set(genome_tree_leaf_set).intersection(gene_gnm_set)
521
+ gnm_tree_subset_str = subset_tree(genome_tree_file_rooted, gnms_in_both_trees, None)
522
+ gnm_tree_subset_str_for_ale = gnm_tree_subset_str
523
+ gnm_tree_subset_str_for_ale = gnm_tree_subset_str_for_ale.replace('GCA_', 'GCA').replace('GCF_', 'GCF')
524
+
525
+ # write out genome tree subset
526
+ with open(pwd_genome_tree_file_subset, 'w') as pwd_genome_tree_file_subset_handle:
527
+ pwd_genome_tree_file_subset_handle.write(gnm_tree_subset_str)
528
+
529
+ # write out genome tree subset for running ALE
530
+ with open(pwd_genome_tree_file_subset_for_ale, 'w') as pwd_genome_tree_file_subset_for_ale_handle:
531
+ pwd_genome_tree_file_subset_for_ale_handle.write(gnm_tree_subset_str_for_ale)
532
+
533
+ # get genes to keep in gene tree
534
+ gene_set_to_keep = set()
535
+ for each_gnm in gnms_in_both_trees:
536
+ gene_set_to_keep.update(gnm_to_gene_dict.get(each_gnm, set()))
537
+
538
+ # subset gene_tree.treefile
539
+ subset_tree(pwd_gene_tree_treefile, gene_set_to_keep, pwd_gene_tree_treefile_subset)
540
+
541
+ # subset gene_tree.ufboot and rename leaves for running ALE
542
+ pwd_gene_tree_ufboot_for_ale_handle = open(pwd_gene_tree_ufboot_for_ale, 'w')
543
+ for each_gene_tree in open(pwd_gene_tree_ufboot):
544
+ gene_tree_str = each_gene_tree.strip()
545
+ gene_tree_str_subset_for_ale = subset_tree(gene_tree_str, gene_set_to_keep, None)
546
+ gene_tree_str_subset_for_ale = gene_tree_str_subset_for_ale.replace('GCA_', 'GCA').replace('GCF_', 'GCF')
547
+ pwd_gene_tree_ufboot_for_ale_handle.write(gene_tree_str_subset_for_ale + '\n')
548
+ pwd_gene_tree_ufboot_for_ale_handle.close()
549
+
550
+ # get gene tree leaf name dict (for plot)
551
+ leaf_name_dict = dict()
552
+ for each_gene in Tree(pwd_gene_tree_treefile_subset).get_leaf_names():
553
+ gene_id = each_gene
554
+ gene_genome = '_'.join(gene_id.split('_')[:-1])
555
+ genome_pco = gnm_pco_dict[gene_genome]
556
+ gene_id_with_taxon = '%s_%s' % (genome_pco, gene_id.split('_')[-1])
557
+ leaf_name_dict[gene_id] = gene_id_with_taxon
558
+
559
+
560
+ def ALE1(args):
561
+
562
+ orthogroups_op_txt = args['i']
563
+ combined_faa = args['s']
564
+ og_program = args['p']
565
+ genome_taxon_txt = args['c']
566
+ min_og_genome_num = args['m']
567
+ min_og_phylum_num = args['n']
568
+ num_threads = args['t']
569
+ js_num_threads = args['jt']
570
+ force_create_op_dir = args['f']
571
+ op_dir = args['o']
572
+ designate_ogs = []
573
+ to_ignore_ogs_list = []
574
+
575
+ # define output file name
576
+ get_gene_tree_cmds_txt = '%s_cmds.txt' % op_dir
577
+
578
+ if os.path.isdir(op_dir) is True:
579
+ if force_create_op_dir is True:
580
+ os.system('rm -r %s' % op_dir)
581
+ else:
582
+ print('Output folder detected, program exited!')
583
+ exit()
584
+
585
+ if force_create_op_dir is True:
586
+ if os.path.isdir(op_dir) is True:
587
+ os.system('rm -r %s' % op_dir)
588
+ os.system('mkdir %s' % op_dir)
589
+
590
+ # read in genome taxonomy
591
+ gnm_p_dict = dict()
592
+ gnm_c_dict = dict()
593
+ gnm_o_dict = dict()
594
+ gnm_pco_dict = dict()
595
+ for each_gnm in open(genome_taxon_txt):
596
+ each_gnm_split = each_gnm.strip().split('\t')
597
+ gnm_id = each_gnm_split[0]
598
+ taxon_str = each_gnm_split[1]
599
+ gnm_phylum = taxon_str.split(';')[1]
600
+ gnm_class = taxon_str.split(';')[2]
601
+ gnm_order = taxon_str.split(';')[3]
602
+ gnm_p_dict[gnm_id] = gnm_phylum
603
+ gnm_c_dict[gnm_id] = gnm_class
604
+ gnm_o_dict[gnm_id] = gnm_order
605
+ gnm_pco_dict[gnm_id] = '%s__%s__%s__%s' % (gnm_phylum[3:], gnm_class[3:], gnm_order[3:], gnm_id)
606
+
607
+ # get ortho_to_gene_dict
608
+ ortho_to_gene_dict = get_ortho_to_gene_dict(orthogroups_op_txt, og_program)
609
+
610
+ # get qualified orthogroups
611
+ qualified_og_set = set()
612
+ for each_ortho in ortho_to_gene_dict:
613
+ ortho_gene_set = ortho_to_gene_dict[each_ortho]
614
+ ortho_p_set = set()
615
+ ortho_gnm_set = set()
616
+ for each_gene in ortho_gene_set:
617
+ gene_gnm = '_'.join(each_gene.split('_')[:-1])
618
+ gnm_taxon = gnm_p_dict[gene_gnm]
619
+ ortho_gnm_set.add(gene_gnm)
620
+ ortho_p_set.add(gnm_taxon)
621
+ if (len(ortho_gnm_set) >= min_og_genome_num) and (len(ortho_p_set) >= min_og_phylum_num):
622
+ qualified_og_set.add(each_ortho)
623
+ print('The total number of identified orthogroups is %s.' % len(ortho_to_gene_dict))
624
+ print('The number of orthogroups spanning >= %s genomes and >= %s phyla is %s.' % (min_og_genome_num, min_og_phylum_num, len(qualified_og_set)))
625
+
626
+ # process qualified OG
627
+ og_to_process = sorted([i for i in qualified_og_set])
628
+ if len(designate_ogs) > 0:
629
+ print('The number of designated OGs to process: %s' % len(designate_ogs))
630
+ og_to_process = designate_ogs
631
+
632
+ og_to_process_no_ignored = set()
633
+ for each_og in og_to_process:
634
+ if each_og not in to_ignore_ogs_list:
635
+ og_to_process_no_ignored.add(each_og)
636
+
637
+ # extract gene sequences and prepare commands for building gene tree
638
+ print('Preparing commands for building gene trees')
639
+ extract_seq_arg_lol = []
640
+ prepare_ale_ip_worker_arg_lol = []
641
+ get_gene_tree_cmds_txt_handle = open(get_gene_tree_cmds_txt, 'w')
642
+ for qualified_og in sorted(og_to_process_no_ignored):
643
+ qualified_og_gene_set = ortho_to_gene_dict[qualified_og]
644
+ qualified_og_gene_txt = '%s/%s.txt' % (op_dir, qualified_og)
645
+ qualified_og_gene_faa = '%s/%s.faa' % (op_dir, qualified_og)
646
+ qualified_og_gene_aln = '%s/%s.aln' % (op_dir, qualified_og)
647
+ qualified_og_gene_aln_trimmed = '%s/%s_trimmed.aln' % (op_dir, qualified_og)
648
+ pwd_gene_tree_ufboot = '%s/%s.ufboot' % (op_dir, qualified_og)
649
+
650
+ # write out the id of genes
651
+ with open(qualified_og_gene_txt, 'w') as qualified_og_gene_txt_handle:
652
+ qualified_og_gene_txt_handle.write('\n'.join(qualified_og_gene_set))
653
+
654
+ # add to mp lol
655
+ extract_seq_arg_lol.append([combined_faa, qualified_og_gene_txt, qualified_og_gene_faa])
656
+
657
+ # write out js for mafft, trimal and iqtree
658
+ mafft_cmd = 'mafft-einsi --thread %s --quiet %s.faa > %s.aln' % (js_num_threads, qualified_og, qualified_og)
659
+ trimal_cmd = 'trimal -in %s.aln -out %s -automated1' % (qualified_og, qualified_og_gene_aln_trimmed)
660
+ iqtree_cmd = 'iqtree -m LG+G+I -bb 1000 --wbtl -nt %s -s %s.aln -pre %s' % (js_num_threads, qualified_og, qualified_og)
661
+ iqtree_cmd_trimmed = 'iqtree -m LG+G+I -bb 1000 --wbtl -nt %s -s %s -pre %s_trimmed' % (js_num_threads, qualified_og_gene_aln_trimmed, qualified_og)
662
+ get_gene_tree_cmds_txt_handle.write('%s; %s\n' % (mafft_cmd, iqtree_cmd))
663
+ get_gene_tree_cmds_txt_handle.close()
664
+
665
+ # extract gene sequences with multiprocessing
666
+ print('Extracting gene sequences with %s cores' % num_threads)
667
+ pool = mp.Pool(processes=num_threads)
668
+ pool.map(select_seq, extract_seq_arg_lol)
669
+ pool.close()
670
+ pool.join()
671
+
672
+
673
+ def ALE2(args):
674
+
675
+ gene_tree_dir = args['i']
676
+ genome_tree_file_rooted = args['s']
677
+ genome_taxon_txt = args['c']
678
+ force_create_ale_wd = args['f']
679
+ num_threads = args['t']
680
+ ale_wd = args['o']
681
+ designate_ogs = []
682
+ to_ignore_ogs_list = []
683
+ run_ale_cmds_txt = '%s_cmds.txt' % ale_wd
684
+
685
+ ufboot_file_re = '%s/*.ufboot' % gene_tree_dir
686
+ ufboot_file_list = glob.glob(ufboot_file_re)
687
+
688
+ og_to_process_list = []
689
+ for each_ufboot in ufboot_file_list:
690
+ _, ufboot_base, _ = sep_path_basename_ext(each_ufboot)
691
+ og_to_process_list.append(ufboot_base)
692
+
693
+ # read in genome taxonomy
694
+ gnm_pco_dict = dict()
695
+ for each_gnm in open(genome_taxon_txt):
696
+ each_gnm_split = each_gnm.strip().split('\t')
697
+ gnm_id = each_gnm_split[0]
698
+ taxon_str = each_gnm_split[1]
699
+ gnm_phylum = taxon_str.split(';')[1]
700
+ gnm_class = taxon_str.split(';')[2]
701
+ gnm_order = taxon_str.split(';')[3]
702
+ gnm_pco_dict[gnm_id] = '%s__%s__%s__%s' % (gnm_phylum[3:], gnm_class[3:], gnm_order[3:], gnm_id)
703
+
704
+ # create ale_wd
705
+ if force_create_ale_wd is True:
706
+ if os.path.isdir(ale_wd) is True:
707
+ os.system('rm -r %s' % ale_wd)
708
+ os.system('mkdir %s' % ale_wd)
709
+
710
+ run_ale_cmds_txt_handle = open(run_ale_cmds_txt, 'w')
711
+ prepare_ale_ip_worker_arg_lol = []
712
+ for qualified_og in og_to_process_list:
713
+ pwd_gene_tree_ufboot = '%s/%s.ufboot' % (gene_tree_dir, qualified_og)
714
+ if os.path.isfile(pwd_gene_tree_ufboot) is False:
715
+ print('%s not found, please build gene tree first!' % pwd_gene_tree_ufboot)
716
+ else:
717
+ gene_tree_ufboot_for_ale = '%s_for_ALE.ufboot' % qualified_og
718
+ genome_tree_file_subset_for_ale = '%s_genome_tree_for_ALE.treefile' % qualified_og
719
+ obtain_ale_file_cmd = 'ALEobserve %s' % gene_tree_ufboot_for_ale
720
+ reconciliation_cmd = 'ALEml_undated %s %s_for_ALE.ufboot.ale' % (genome_tree_file_subset_for_ale, qualified_og)
721
+ current_arg_list = [qualified_og, gene_tree_dir, ale_wd, genome_tree_file_rooted, gnm_pco_dict, gene_tree_ufboot_for_ale, genome_tree_file_subset_for_ale]
722
+ run_ale_cmds_txt_handle.write('%s; %s\n' % (obtain_ale_file_cmd, reconciliation_cmd))
723
+ prepare_ale_ip_worker_arg_lol.append(current_arg_list)
724
+ run_ale_cmds_txt_handle.close()
725
+
726
+ # prepare input files and job script for running ALE with multiprocessing
727
+ print('Preparing files for running ALE with %s cores for %s OGs' % (num_threads, len(prepare_ale_ip_worker_arg_lol)))
728
+ pool = mp.Pool(processes=num_threads)
729
+ pool.map(prepare_ale_ip_worker, prepare_ale_ip_worker_arg_lol)
730
+ pool.close()
731
+ pool.join()
732
+
733
+
734
+ def iTOL(Leaf_to_Group_dict, Group_to_Color_dict, FileOut):
735
+
736
+ Group_set = set()
737
+ for each_leaf in Leaf_to_Group_dict:
738
+ Group_set.add(Leaf_to_Group_dict[each_leaf])
739
+
740
+ if len(Group_to_Color_dict) == 0:
741
+ Group_to_Color_dict = dict(zip(Group_set, get_color_list(len(Group_set))))
742
+ else:
743
+ group_without_color_list = []
744
+ for each_group in Group_set:
745
+ if each_group not in Group_to_Color_dict:
746
+ group_without_color_list.append(each_group)
747
+ if len(group_without_color_list) > 0:
748
+ color_list_unprovided = get_color_list(len(group_without_color_list))
749
+ Group_to_Color_dict_unprovided = dict(zip(group_without_color_list, color_list_unprovided))
750
+ for each_group in Group_to_Color_dict_unprovided:
751
+ Group_to_Color_dict[each_group] = Group_to_Color_dict_unprovided[each_group]
752
+
753
+ FileOut_handle = open(FileOut, 'w')
754
+ FileOut_handle.write('DATASET_COLORSTRIP\n')
755
+ FileOut_handle.write('SEPARATOR TAB\n')
756
+ FileOut_handle.write('DATASET_LABEL\tTaxonomy\n')
757
+ FileOut_handle.write('\n# customize strip attributes here\n')
758
+ FileOut_handle.write('STRIP_WIDTH\t100\n')
759
+ FileOut_handle.write('MARGIN\t20\n')
760
+ FileOut_handle.write('\n# provide data here\nDATA\n')
761
+ for leaf in Leaf_to_Group_dict:
762
+ leaf_group = Leaf_to_Group_dict[leaf]
763
+ leaf_color = Group_to_Color_dict[leaf_group]
764
+ FileOut_handle.write('%s\t%s\t%s\n' % (leaf, leaf_color, leaf_group))
765
+ FileOut_handle.close()
766
+
767
+
768
+ def parse_ale_op_worker(arg_list):
769
+
770
+ qualified_og = arg_list[0]
771
+ gene_tree_dir = arg_list[1]
772
+ ale_wd = arg_list[2]
773
+ ale_op_dir = arg_list[3]
774
+ ale_hgt_plot_dir = arg_list[4]
775
+ interal_node_prefix = arg_list[5]
776
+ gnm_pco_dict = arg_list[6]
777
+ d_color = arg_list[7]
778
+ r_color = arg_list[8]
779
+ project_name = arg_list[9]
780
+ API_key = arg_list[10]
781
+ display_mode = arg_list[11]
782
+ hgt_freq_cutoff = arg_list[12]
783
+ ignore_leaf_hgt = arg_list[13]
784
+ ignore_vertical_hgt = arg_list[14]
785
+ donor_node_min_leaf_num = arg_list[15]
786
+ recipient_node_min_leaf_num = arg_list[16]
787
+ dr_separator = arg_list[17]
788
+ root_gene_tree_at_midpoint = arg_list[18]
789
+ p_color_txt = arg_list[19]
790
+
791
+ ale_uml_rec_file = '%s/%s_for_ALE.ufboot.ale.uml_rec' % (ale_wd, qualified_og)
792
+ gene_tree_treefile = '%s.treefile' % qualified_og
793
+ genome_tree_file_subset_for_ale = '%s_genome_tree_for_ALE.treefile' % qualified_og
794
+ gene_tree_ufboot_for_ale = '%s_for_ALE.ufboot' % qualified_og
795
+ uts_file = '%s.ale.uTs' % gene_tree_ufboot_for_ale
796
+ uml_rec_file = '%s.ale.uml_rec' % gene_tree_ufboot_for_ale
797
+ ale_formatted_gnm_tree = '%s_ALE_formatted_genome_tree.tree' % gene_tree_ufboot_for_ale
798
+ ale_formatted_gnm_tree_with_len = '%s_ALE_formatted_genome_tree_with_len.tree' % gene_tree_ufboot_for_ale
799
+ ale_formatted_gnm_tree_with_len_prefixed = '%s_ALE_formatted_genome_tree_with_len_prefixed.tree' % gene_tree_ufboot_for_ale
800
+ itol_connection_txt_all = '%s_iTOL_connection.txt' % qualified_og
801
+ itol_label_txt = '%s_iTOL_genome_pco.txt' % qualified_og
802
+ gene_tree_itol_label_txt = '%s_iTOL_gene_pco.txt' % qualified_og
803
+ gene_tree_treefile_subset = '%s_subset.treefile' % qualified_og
804
+ gene_tree_treefile_subset_midpoint_rooted = '%s_subset_midpoint_rooted.treefile' % qualified_og
805
+ gene_tree_itol_colorstrip_txt = '%s_iTOL_colorstrip_gene.txt' % qualified_og
806
+ genome_tree_itol_colorstrip_txt = '%s_iTOL_colorstrip_genome.txt' % qualified_og
807
+ pwd_gene_tree_treefile_subset = '%s/%s' % (gene_tree_dir, gene_tree_treefile_subset)
808
+ pwd_gene_tree_treefile_subset_midpoint_rooted = '%s/%s' % (ale_op_dir, gene_tree_treefile_subset_midpoint_rooted)
809
+ pwd_gene_tree_treefile = '%s/%s' % (gene_tree_dir, gene_tree_treefile)
810
+ pwd_genome_tree_file_subset_for_ale = '%s/%s' % (ale_op_dir, genome_tree_file_subset_for_ale)
811
+ pwd_itol_connection_txt_all = '%s/%s' % (ale_hgt_plot_dir, itol_connection_txt_all)
812
+ pwd_itol_label_txt = '%s/%s' % (ale_op_dir, itol_label_txt)
813
+ pwd_gene_tree_itol_label_txt = '%s/%s' % (ale_hgt_plot_dir, gene_tree_itol_label_txt)
814
+ pwd_uts_file = '%s/%s' % (ale_op_dir, uts_file)
815
+ pwd_uml_rec_file = '%s/%s' % (ale_op_dir, uml_rec_file)
816
+ pwd_ale_formatted_gnm_tree = '%s/%s' % (ale_op_dir, ale_formatted_gnm_tree)
817
+ pwd_ale_formatted_gnm_tree_with_len = '%s/%s' % (ale_op_dir, ale_formatted_gnm_tree_with_len)
818
+ pwd_ale_formatted_gnm_tree_with_len_prefixed = '%s/%s' % (ale_op_dir, ale_formatted_gnm_tree_with_len_prefixed)
819
+ pwd_gene_tree_itol_colorstrip_txt = '%s/%s' % (ale_hgt_plot_dir, gene_tree_itol_colorstrip_txt)
820
+ pwd_genome_tree_itol_colorstrip_txt = '%s/%s' % (ale_hgt_plot_dir, genome_tree_itol_colorstrip_txt)
821
+
822
+ # run ale_splitter
823
+ ale_splitter(ale_uml_rec_file)
824
+
825
+ # read in phylum color
826
+ p_color_dict = dict()
827
+ for each_line in open(p_color_txt):
828
+ each_line_split = each_line.strip().split('\t')
829
+ phylum_id = each_line_split[1]
830
+ color_id = each_line_split[0]
831
+ p_color_dict[phylum_id] = color_id
832
+
833
+ internal_node_to_leaf_dict = dict()
834
+ paired_donor_to_recipient_leaf_dict = dict()
835
+ hgt_freq_dict = dict()
836
+ if os.path.isfile(pwd_uts_file) is True:
837
+
838
+ # write out ALE formatted genome tree
839
+ renamed_genome_tree_str = open(pwd_uml_rec_file).readlines()[2].strip().split('\t')[1]
840
+ with open(pwd_ale_formatted_gnm_tree, 'w') as ale_renamed_species_tree_handle:
841
+ ale_renamed_species_tree_handle.write(renamed_genome_tree_str + '\n')
842
+
843
+ internal_node_to_leaf_dict, paired_donor_to_recipient_leaf_dict, hgt_freq_dict = uts_to_itol_connections(pwd_genome_tree_file_subset_for_ale, pwd_ale_formatted_gnm_tree, interal_node_prefix, pwd_uts_file, hgt_freq_cutoff, ignore_leaf_hgt, ignore_vertical_hgt, donor_node_min_leaf_num, recipient_node_min_leaf_num, pwd_itol_connection_txt_all, dr_separator)
844
+ else:
845
+ print('%s: uTs file not found, you need to run ALE first!' % qualified_og)
846
+
847
+ # combine_trees
848
+ combine_trees(pwd_genome_tree_file_subset_for_ale, pwd_ale_formatted_gnm_tree, pwd_ale_formatted_gnm_tree_with_len)
849
+
850
+ # prefix_internal_nodes of combined tree
851
+ prefix_internal_nodes(pwd_ale_formatted_gnm_tree_with_len, interal_node_prefix, pwd_ale_formatted_gnm_tree_with_len_prefixed)
852
+
853
+ # write out iTOL label file for gene and genome tree, also colorstrip for taxonomy
854
+ pwd_itol_label_txt_handle = open(pwd_itol_label_txt, 'w')
855
+ pwd_itol_label_txt_handle.write('LABELS\nSEPARATOR TAB\n\nDATA\n')
856
+ pwd_gene_tree_itol_label_txt_handle = open(pwd_gene_tree_itol_label_txt, 'w')
857
+ pwd_gene_tree_itol_label_txt_handle.write('LABELS\nSEPARATOR TAB\n\nDATA\n')
858
+ wrote_gnm_set = set()
859
+ gene_to_p_dict = dict()
860
+ genome_to_p_dict = dict()
861
+ for each_gene in Tree(pwd_gene_tree_treefile).get_leaf_names():
862
+ gene_gnm = '_'.join(each_gene.split('_')[:-1])
863
+ genome_name_for_ale = gene_gnm
864
+ genome_name_for_ale = genome_name_for_ale.replace('GCA_', 'GCA').replace('GCF_', 'GCF')
865
+ genome_with_taxon = gnm_pco_dict[gene_gnm]
866
+ gene_to_p_dict[each_gene] = genome_with_taxon.split('__')[0]
867
+ if gene_gnm not in wrote_gnm_set:
868
+ genome_to_p_dict[genome_name_for_ale] = genome_with_taxon.split('__')[0]
869
+ pwd_itol_label_txt_handle.write('%s\t%s\n' % (genome_name_for_ale, genome_with_taxon))
870
+ wrote_gnm_set.add(gene_gnm)
871
+ pwd_gene_tree_itol_label_txt_handle.write('%s\t%s_%s\n' % (each_gene, genome_with_taxon, each_gene.split('_')[-1]))
872
+ pwd_itol_label_txt_handle.close()
873
+ pwd_gene_tree_itol_label_txt_handle.close()
874
+
875
+ iTOL(gene_to_p_dict, p_color_dict, pwd_gene_tree_itol_colorstrip_txt)
876
+ iTOL(genome_to_p_dict, p_color_dict, pwd_genome_tree_itol_colorstrip_txt)
877
+
878
+ # root gene tree at midpoint
879
+ gene_tree_to_plot = pwd_gene_tree_treefile_subset
880
+ if root_gene_tree_at_midpoint is True:
881
+ root_at_midpoint(pwd_gene_tree_treefile_subset, pwd_gene_tree_treefile_subset_midpoint_rooted)
882
+ gene_tree_to_plot = pwd_gene_tree_treefile_subset_midpoint_rooted
883
+
884
+ # plot separately
885
+ n = 1
886
+ for each_d2r in paired_donor_to_recipient_leaf_dict:
887
+ each_d2r_freq = hgt_freq_dict[each_d2r]
888
+ each_d2r_d_list = paired_donor_to_recipient_leaf_dict[each_d2r][0]
889
+ each_d2r_r_list = paired_donor_to_recipient_leaf_dict[each_d2r][1]
890
+ pwd_itol_label_txt = '%s/%s_iTOL_genome_pco.txt' % (ale_op_dir, qualified_og)
891
+ pwd_gene_tree_itol_label_txt = '%s/%s_iTOL_gene_pco.txt' % (ale_hgt_plot_dir, qualified_og)
892
+ pwd_gnm_tree_label_color_txt = '%s/%s_iTOL_label_color_genome_%s.txt' % (ale_hgt_plot_dir, qualified_og, each_d2r)
893
+ pwd_gene_tree_label_color_txt = '%s/%s_iTOL_label_color_gene_%s.txt' % (ale_hgt_plot_dir, qualified_og, each_d2r)
894
+ pwd_itol_connection_txt = '%s/%s_iTOL_connection_%s.txt' % (ale_hgt_plot_dir, qualified_og, each_d2r)
895
+ pwd_ale_formatted_gnm_tree_with_len_prefixed_pdf = '%s/%s_genome_tree_with_HGT_%s.pdf' % (ale_wd, qualified_og, each_d2r)
896
+ pwd_gene_tree_treefile_subset_pdf = '%s/%s_subset_%s.pdf' % (ale_hgt_plot_dir, qualified_og, each_d2r)
897
+ pwd_gene_tree_treefile_subset_pdf_rooted = '%s/%s_subset_%s_rooted.pdf' % (ale_hgt_plot_dir, qualified_og, each_d2r)
898
+ pwd_combined_image_with_ale_hgts = '%s/%s_HGT_%s_%s_%s.pdf' % (ale_hgt_plot_dir, qualified_og, n, each_d2r, each_d2r_freq)
899
+
900
+ # write out gnm_tree_label_color_txt
901
+ pwd_gnm_tree_label_color_txt_handle = open(pwd_gnm_tree_label_color_txt, 'w')
902
+ pwd_gnm_tree_label_color_txt_handle.write('DATASET_STYLE\nSEPARATOR TAB\nDATASET_LABEL\texample_style\nCOLOR\t#ffff00\n\nDATA\n')
903
+ pwd_gnm_tree_label_color_txt_handle.write('%s\tlabel\tclade\t%s\t1\tnormal\n' % (each_d2r.split(dr_separator)[0], d_color))
904
+ pwd_gnm_tree_label_color_txt_handle.write('%s\tlabel\tclade\t%s\t1\tnormal\n' % (each_d2r.split(dr_separator)[1], r_color))
905
+ pwd_gnm_tree_label_color_txt_handle.close()
906
+
907
+ # write out iTOL label file for gene and genome tree, also colorstrip for taxonomy
908
+ pwd_gene_tree_label_color_txt_handle = open(pwd_gene_tree_label_color_txt, 'w')
909
+ pwd_gene_tree_label_color_txt_handle.write('DATASET_STYLE\nSEPARATOR TAB\nDATASET_LABEL\texample_style\nCOLOR\t#ffff00\n\nDATA\n')
910
+ for each_gene in Tree(pwd_gene_tree_treefile).get_leaf_names():
911
+
912
+ gene_name_for_ale = '_'.join(each_gene.strip().split('_')[:-1])
913
+ gene_name_for_ale = gene_name_for_ale.replace('GCA_', 'GCA').replace('GCF_', 'GCF')
914
+ if gene_name_for_ale in each_d2r_d_list:
915
+ pwd_gene_tree_label_color_txt_handle.write('%s\tlabel\tnode\t%s\t1\tnormal\n' % (each_gene, d_color))
916
+ elif gene_name_for_ale in each_d2r_r_list:
917
+ pwd_gene_tree_label_color_txt_handle.write('%s\tlabel\tnode\t%s\t1\tnormal\n' % (each_gene, r_color))
918
+ pwd_gene_tree_label_color_txt_handle.close()
919
+
920
+ itol_tree(pwd_ale_formatted_gnm_tree_with_len_prefixed, [pwd_gnm_tree_label_color_txt, pwd_itol_label_txt, pwd_itol_connection_txt, pwd_genome_tree_itol_colorstrip_txt], project_name, API_key, display_mode, pwd_ale_formatted_gnm_tree_with_len_prefixed_pdf)
921
+ itol_tree(gene_tree_to_plot, [pwd_gene_tree_itol_label_txt, pwd_gene_tree_label_color_txt, pwd_gene_tree_itol_colorstrip_txt], project_name, API_key, display_mode, pwd_gene_tree_treefile_subset_pdf)
922
+ merge_pdf(pwd_ale_formatted_gnm_tree_with_len_prefixed_pdf, pwd_gene_tree_treefile_subset_pdf, 66, pwd_combined_image_with_ale_hgts)
923
+ n += 1
924
+
925
+ os.system('mv %s %s/annotation_files/' % (pwd_ale_formatted_gnm_tree_with_len_prefixed_pdf, ale_hgt_plot_dir))
926
+ os.system('mv %s %s/annotation_files/' % (pwd_gene_tree_treefile_subset_pdf, ale_hgt_plot_dir))
927
+ os.system('mv %s %s/annotation_files/' % (pwd_gnm_tree_label_color_txt, ale_hgt_plot_dir))
928
+ os.system('mv %s %s/annotation_files/' % (pwd_gene_tree_label_color_txt, ale_hgt_plot_dir))
929
+ os.system('mv %s %s/annotation_files/' % (pwd_itol_connection_txt, ale_hgt_plot_dir))
930
+ os.system('mv %s %s/annotation_files/' % (pwd_itol_label_txt, ale_hgt_plot_dir))
931
+ os.system('mv %s %s/annotation_files/' % (pwd_gene_tree_itol_label_txt, ale_hgt_plot_dir))
932
+ os.system('mv %s %s/annotation_files/' % (pwd_itol_connection_txt_all, ale_hgt_plot_dir))
933
+ os.system('mv %s %s/annotation_files/' % (pwd_gene_tree_itol_colorstrip_txt, ale_hgt_plot_dir))
934
+ os.system('mv %s %s/annotation_files/' % (pwd_genome_tree_itol_colorstrip_txt, ale_hgt_plot_dir))
935
+
936
+
937
+ def ale_splitter(rec_file):
938
+
939
+ options = [True, True, True, True]
940
+ with open(rec_file) as f:
941
+ lines = f.readlines()
942
+ stree = lines[2].strip()
943
+ ll = lines[6].strip().split()[-1]
944
+ rates = lines[8].strip().split("\t")[1:]
945
+ n_reconciled_trees = int(lines[10].strip().split()[0])
946
+ reconciled_trees = lines[12:n_reconciled_trees + 12]
947
+ n_of_events = lines[12 + n_reconciled_trees + 1].split("\t")[1:]
948
+ table = lines[12 + n_reconciled_trees + 3:]
949
+
950
+ if options[0]:
951
+ with open(rec_file.replace("uml_rec", "stree"), "w") as f:
952
+ f.write(stree.split("\t")[-1])
953
+ if options[1]:
954
+ with open(rec_file.replace("uml_rec", "info"), "w") as f:
955
+ f.write("LL:" + "\t" + ll + "\n")
956
+ f.write("Dp:" + "\t" + rates[0] + "\n")
957
+ f.write("Tp:" + "\t" + rates[1] + "\n")
958
+ f.write("Lp:" + "\t" + rates[2] + "\n")
959
+ f.write("De:" + "\t" + n_of_events[0] + "\n")
960
+ f.write("Te:" + "\t" + n_of_events[1] + "\n")
961
+ f.write("Le:" + "\t" + n_of_events[2] + "\n")
962
+ f.write("Se:" + "\t" + n_of_events[3] + "\n")
963
+ if options[2]:
964
+ with open(rec_file.replace("uml_rec", "recs"), "w") as f:
965
+ for t in reconciled_trees:
966
+ f.write(t)
967
+ if options[3]:
968
+ with open(rec_file.replace("uml_rec", "rec_table"), "w") as f:
969
+ for e in table:
970
+ f.write(e)
971
+
972
+
973
+ def ALE3(args):
974
+
975
+ gene_tree_dir = args['i1']
976
+ ale_wd = args['i2']
977
+ genome_taxon_txt = args['c']
978
+ ar_phylum_color_code_txt = args['color']
979
+ ale_hgt_plot_dir = args['o']
980
+ force_create_op_dir = args['f']
981
+ API_key = args['api']
982
+ hgt_freq_cutoff = args['fc']
983
+ donor_node_min_leaf_num = args['mld']
984
+ recipient_node_min_leaf_num = args['mlr']
985
+ project_name = args['itol']
986
+
987
+ ignore_vertical_hgt = True # filter ALE predicted HGTs
988
+ ignore_leaf_hgt = True # filter ALE predicted HGTs
989
+ interal_node_prefix = 'IN' # plot tree with HGT
990
+ display_mode = '1' # plot tree with HGT, 1=rectangular, 2=circular, 3=unrooted
991
+ align_leaf_name = True # plot tree with HGT
992
+ show_scale = False # plot tree with HGT
993
+ d_color = '#FF0000' # plot tree with HGT
994
+ r_color = '#0000FF' # plot tree with HGT
995
+ dr_separator = '_to_' # plot tree with HGT
996
+ root_gene_tree_at_midpoint = True # plot tree with HGT
997
+
998
+ ####################################################################################################################
999
+
1000
+ ufboot_file_re = '%s/*.ufboot' % gene_tree_dir
1001
+ ufboot_file_list = glob.glob(ufboot_file_re)
1002
+ og_to_process_list = []
1003
+ for each_ufboot in ufboot_file_list:
1004
+ _, ufboot_base, _ = sep_path_basename_ext(each_ufboot)
1005
+ og_to_process_list.append(ufboot_base)
1006
+
1007
+ # read in genome taxonomy
1008
+ gnm_pco_dict = dict()
1009
+ for each_gnm in open(genome_taxon_txt):
1010
+ each_gnm_split = each_gnm.strip().split('\t')
1011
+ gnm_id = each_gnm_split[0]
1012
+ taxon_str = each_gnm_split[1]
1013
+ gnm_phylum = taxon_str.split(';')[1]
1014
+ gnm_class = taxon_str.split(';')[2]
1015
+ gnm_order = taxon_str.split(';')[3]
1016
+ gnm_pco_dict[gnm_id] = '%s__%s__%s__%s' % (gnm_phylum[3:], gnm_class[3:], gnm_order[3:], gnm_id)
1017
+
1018
+ if os.path.isdir(ale_hgt_plot_dir) is True:
1019
+ if force_create_op_dir is True:
1020
+ os.system('rm -r %s' % ale_hgt_plot_dir)
1021
+ else:
1022
+ print('Output folder detected, program exited!')
1023
+ exit()
1024
+ os.system('mkdir %s' % ale_hgt_plot_dir)
1025
+ os.system('mkdir %s/annotation_files' % ale_hgt_plot_dir)
1026
+
1027
+ # parse ALE output
1028
+ n = 1
1029
+ for qualified_og in og_to_process_list:
1030
+
1031
+ print('%s (%s/%s): Parsing ALE outputs' % (qualified_og, n, len(og_to_process_list)))
1032
+ current_arg_list = [qualified_og, gene_tree_dir, ale_wd, ale_wd, ale_hgt_plot_dir, interal_node_prefix,
1033
+ gnm_pco_dict, d_color, r_color, project_name, API_key, display_mode, hgt_freq_cutoff,
1034
+ ignore_leaf_hgt, ignore_vertical_hgt, donor_node_min_leaf_num, recipient_node_min_leaf_num,
1035
+ dr_separator, root_gene_tree_at_midpoint, ar_phylum_color_code_txt]
1036
+ parse_ale_op_worker(current_arg_list)
1037
+ n += 1
1038
+
1039
+ print('Done!')
1040
+
1041
+
1042
+ def ALE4(args):
1043
+ pass
1044
+
1045
+
1046
+ if __name__ == '__main__':
1047
+ pass
1048
+
1049
+ # ALE1_parser = argparse.ArgumentParser()
1050
+ # ALE1_parser.add_argument('-i', required=True, help='orthologous groups, either from orthofinder or oma')
1051
+ # ALE1_parser.add_argument('-s', required=True, help='sequence file, e.g., combined.faa')
1052
+ # ALE1_parser.add_argument('-p', required=True, help='orthologous identification program, orthofinder or oma')
1053
+ # ALE1_parser.add_argument('-m', required=False, type=int, default=50, help='min_og_genome_num, default: 50')
1054
+ # ALE1_parser.add_argument('-n', required=False, type=int, default=2, help='min_og_phylum_num, default: 2')
1055
+ # ALE1_parser.add_argument('-o', required=True, help='output dir, i.e., OMA working directory')
1056
+ # ALE1_parser.add_argument('-t', required=False, type=int, default=6, help='number of threads, default: 6')
1057
+ # ALE1_parser.add_argument('-jt', required=False, type=int, default=3, help='number of threads for job script, default: 3')
1058
+ # ALE1_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
1059
+ # ALE1_parser.add_argument('-c', required=True, help='genome_taxon_txt')
1060
+ # args = vars(ALE1_parser.parse_args())
1061
+ # ALE1(args)
1062
+
1063
+ # ALE2_parser = argparse.ArgumentParser()
1064
+ # ALE2_parser.add_argument('-i', required=True, help='ALE1 output directory')
1065
+ # ALE2_parser.add_argument('-s', required=True, help='rooted species tree')
1066
+ # ALE2_parser.add_argument('-c', required=True, help='genome_taxon_txt')
1067
+ # ALE2_parser.add_argument('-o', required=True, help='output dir, i.e., OMA working directory')
1068
+ # ALE2_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
1069
+ # ALE2_parser.add_argument('-t', required=False, type=int, default=6, help='number of threads, default: 6')
1070
+ # args = vars(ALE2_parser.parse_args())
1071
+ # ALE2(args)
1072
+
1073
+ # ALE3_parser = argparse.ArgumentParser()
1074
+ # ALE3_parser.add_argument('-i1', required=True, help='ALE1 output directory')
1075
+ # ALE3_parser.add_argument('-i2', required=True, help='ALE2 output directory')
1076
+ # ALE3_parser.add_argument('-c', required=True, help='genome_taxon_txt')
1077
+ # ALE3_parser.add_argument('-color', required=True, help='phylum_color_code.txt')
1078
+ # ALE3_parser.add_argument('-o', required=True, help='output dir, i.e., ALE3_op_dir')
1079
+ # ALE3_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
1080
+ # args = vars(ALE3_parser.parse_args())
1081
+ # ALE3(args)