treesak 1.53.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. TreeSAK/ALE.py +63 -0
  2. TreeSAK/ALE1.py +268 -0
  3. TreeSAK/ALE2.py +168 -0
  4. TreeSAK/ALE2RTC.py +30 -0
  5. TreeSAK/ALE3.py +205 -0
  6. TreeSAK/ALE4.py +636 -0
  7. TreeSAK/ALE5.py +210 -0
  8. TreeSAK/ALE6.py +401 -0
  9. TreeSAK/ALE7.py +126 -0
  10. TreeSAK/ALE_backup.py +1081 -0
  11. TreeSAK/AssessCVG.py +128 -0
  12. TreeSAK/AssessMarker.py +306 -0
  13. TreeSAK/AssessMarkerDeltaLL.py +257 -0
  14. TreeSAK/AssessMarkerPA.py +317 -0
  15. TreeSAK/AssessPB.py +113 -0
  16. TreeSAK/BMGE.jar +0 -0
  17. TreeSAK/BMGE.py +49 -0
  18. TreeSAK/C60SR4.nex +127 -0
  19. TreeSAK/CompareMCMC.py +138 -0
  20. TreeSAK/ConcateMSA.py +111 -0
  21. TreeSAK/ConvertMSA.py +135 -0
  22. TreeSAK/Dir.rb +82 -0
  23. TreeSAK/ExtractMarkerSeq.py +263 -0
  24. TreeSAK/FastRoot.py +1175 -0
  25. TreeSAK/FastRoot_backup.py +1122 -0
  26. TreeSAK/FigTree.py +34 -0
  27. TreeSAK/GTDB_tree.py +76 -0
  28. TreeSAK/GeneTree.py +142 -0
  29. TreeSAK/KEGG_Luo17.py +807 -0
  30. TreeSAK/LcaToLeaves.py +66 -0
  31. TreeSAK/MarkerRef2Tree.py +616 -0
  32. TreeSAK/MarkerRef2Tree_backup.py +628 -0
  33. TreeSAK/MarkerSeq2Tree.py +299 -0
  34. TreeSAK/MarkerSeq2Tree_backup.py +259 -0
  35. TreeSAK/ModifyTopo.py +116 -0
  36. TreeSAK/Newick_tree_plotter.py +79 -0
  37. TreeSAK/OMA.py +170 -0
  38. TreeSAK/OMA2.py +212 -0
  39. TreeSAK/OneLineAln.py +50 -0
  40. TreeSAK/PB.py +155 -0
  41. TreeSAK/PMSF.py +115 -0
  42. TreeSAK/PhyloBiAssoc.R +84 -0
  43. TreeSAK/PhyloBiAssoc.py +167 -0
  44. TreeSAK/PlotMCMC.py +41 -0
  45. TreeSAK/PlotMcmcNode.py +152 -0
  46. TreeSAK/PlotMcmcNode_old.py +252 -0
  47. TreeSAK/RootTree.py +101 -0
  48. TreeSAK/RootTreeGTDB.py +371 -0
  49. TreeSAK/RootTreeGTDB214.py +288 -0
  50. TreeSAK/RootTreeGTDB220.py +300 -0
  51. TreeSAK/SequentialDating.py +16 -0
  52. TreeSAK/SingleAleHGT.py +157 -0
  53. TreeSAK/SingleLinePhy.py +50 -0
  54. TreeSAK/SliceMSA.py +142 -0
  55. TreeSAK/SplitScore.py +21 -0
  56. TreeSAK/SplitScore1.py +177 -0
  57. TreeSAK/SplitScore1OMA.py +148 -0
  58. TreeSAK/SplitScore2.py +608 -0
  59. TreeSAK/TaxaCountStats.R +256 -0
  60. TreeSAK/TaxonTree.py +47 -0
  61. TreeSAK/TreeSAK_config.py +32 -0
  62. TreeSAK/VERSION +164 -0
  63. TreeSAK/VisHPD95.R +45 -0
  64. TreeSAK/VisHPD95.py +200 -0
  65. TreeSAK/__init__.py +0 -0
  66. TreeSAK/ale_parser.py +74 -0
  67. TreeSAK/ale_splitter.py +63 -0
  68. TreeSAK/alignment_pruner.pl +1471 -0
  69. TreeSAK/assessOG.py +45 -0
  70. TreeSAK/batch_itol.py +171 -0
  71. TreeSAK/catfasta2phy.py +140 -0
  72. TreeSAK/cogTree.py +185 -0
  73. TreeSAK/compare_trees.R +30 -0
  74. TreeSAK/compare_trees.py +255 -0
  75. TreeSAK/dating.py +264 -0
  76. TreeSAK/dating_ss.py +361 -0
  77. TreeSAK/deltall.py +82 -0
  78. TreeSAK/do_rrtc.rb +464 -0
  79. TreeSAK/fa2phy.py +42 -0
  80. TreeSAK/filter_rename_ar53.py +118 -0
  81. TreeSAK/format_leaf_name.py +70 -0
  82. TreeSAK/gap_stats.py +38 -0
  83. TreeSAK/get_SCG_tree.py +742 -0
  84. TreeSAK/get_arCOG_seq.py +97 -0
  85. TreeSAK/global_functions.py +222 -0
  86. TreeSAK/gnm_leaves.py +43 -0
  87. TreeSAK/iTOL.py +791 -0
  88. TreeSAK/iTOL_gene_tree.py +80 -0
  89. TreeSAK/itol_msa_stats.py +56 -0
  90. TreeSAK/keep_highest_rrtc.py +37 -0
  91. TreeSAK/koTree.py +194 -0
  92. TreeSAK/label_gene_tree_by_gnm.py +34 -0
  93. TreeSAK/label_tree.R +75 -0
  94. TreeSAK/label_tree.py +121 -0
  95. TreeSAK/mad.py +708 -0
  96. TreeSAK/mcmc2tree.py +58 -0
  97. TreeSAK/mcmcTC copy.py +92 -0
  98. TreeSAK/mcmcTC.py +104 -0
  99. TreeSAK/mcmctree_vs_reltime.R +44 -0
  100. TreeSAK/mcmctree_vs_reltime.py +252 -0
  101. TreeSAK/merge_pdf.py +32 -0
  102. TreeSAK/pRTC.py +56 -0
  103. TreeSAK/parse_mcmctree.py +198 -0
  104. TreeSAK/parse_reltime.py +141 -0
  105. TreeSAK/phy2fa.py +37 -0
  106. TreeSAK/plot_distruibution_th.py +165 -0
  107. TreeSAK/prep_mcmctree_ctl.py +92 -0
  108. TreeSAK/print_leaves.py +32 -0
  109. TreeSAK/pruneMSA.py +63 -0
  110. TreeSAK/recode.py +73 -0
  111. TreeSAK/remove_bias.R +112 -0
  112. TreeSAK/rename_leaves.py +78 -0
  113. TreeSAK/replace_clade.py +55 -0
  114. TreeSAK/root_with_out_group.py +84 -0
  115. TreeSAK/run_TaxaCountStats_R_s1.py +455 -0
  116. TreeSAK/subsample_drep_gnms.py +74 -0
  117. TreeSAK/subset.py +69 -0
  118. TreeSAK/subset_tree_stupid_old_way.py +193 -0
  119. TreeSAK/supertree.py +330 -0
  120. TreeSAK/tmp_1.py +19 -0
  121. TreeSAK/tmp_2.py +19 -0
  122. TreeSAK/tmp_3.py +120 -0
  123. TreeSAK/tmp_4.py +43 -0
  124. TreeSAK/tmp_5.py +12 -0
  125. TreeSAK/weighted_rand.rb +23 -0
  126. treesak-1.53.3.data/scripts/TreeSAK +955 -0
  127. treesak-1.53.3.dist-info/LICENSE +674 -0
  128. treesak-1.53.3.dist-info/METADATA +27 -0
  129. treesak-1.53.3.dist-info/RECORD +131 -0
  130. treesak-1.53.3.dist-info/WHEEL +5 -0
  131. treesak-1.53.3.dist-info/top_level.txt +1 -0
TreeSAK/SplitScore.py ADDED
@@ -0,0 +1,21 @@
1
+
2
+ SplitScore_usage = '''
3
+ ============================================= SplitScore example commands =============================================
4
+
5
+ # SplitScore modules
6
+ TreeSAK SplitScore1 -> Step 1: Infer gene tree
7
+ TreeSAK SplitScore1OMA -> Step 1: Infer gene tree (based on OMA outputs)
8
+ TreeSAK SplitScore2 -> Step 2: Calculate split score
9
+
10
+ # SplitScore1
11
+ TreeSAK SplitScore1 -i OrthologousGroups.txt -s OrthologousGroupsFasta -o step1_op_dir -t 6 -f
12
+ TreeSAK SplitScore1 -i OrthologousGroups.txt -s OrthologousGroupsFasta -o step1_op_dir -t 6 -f -u interested_gnm.txt
13
+
14
+ # SplitScore2
15
+ # Please ensure that all the commands produced in step one have been executed before proceeding to step two.
16
+ TreeSAK SplitScore2 -i step1_op_dir -g gnm_cluster.tsv -k gnm_taxon.txt -f -t 10 -o step_2_op_dir
17
+
18
+ # As described in the Undinarchaeota paper (Nina Dombrowski 2020, NC)
19
+
20
+ =======================================================================================================================
21
+ '''
TreeSAK/SplitScore1.py ADDED
@@ -0,0 +1,177 @@
1
+ from __future__ import print_function
2
+ import os
3
+ import glob
4
+ import argparse
5
+ from Bio import SeqIO
6
+
7
+
8
+ SplitScore1_usage = '''
9
+ ======================== SplitScore1 example commands ========================
10
+
11
+ TreeSAK SplitScore1 -i marker_seq -x fa -o SplitScore1_op_dir -jst 9 -f
12
+
13
+ # As described in the Undinarchaeota paper (Nina Dombrowski 2020, NC)
14
+
15
+ ==============================================================================
16
+ '''
17
+
18
+
19
+ def sep_path_basename_ext(file_in):
20
+ f_path, file_name = os.path.split(file_in)
21
+ if f_path == '':
22
+ f_path = '.'
23
+ f_base, f_ext = os.path.splitext(file_name)
24
+ return f_path, f_base, f_ext
25
+
26
+
27
+ def SplitScore1(args):
28
+
29
+ oma_op_fasta = args['i']
30
+ fasta_file_ext = args['x']
31
+ interested_gnm_txt = args['u']
32
+ iqtree_model = args['m']
33
+ cov_cutoff = args['c']
34
+ force_overwrite = args['f']
35
+ num_of_js_threads = args['jst']
36
+ op_dir = args['o']
37
+ seq_named_by_gnm = args['seq_named_by_gnm']
38
+ bmge_trim_model = 'BLOSUM30'
39
+ bmge_entropy_score_cutoff = '0.55'
40
+
41
+ ################################################################################
42
+
43
+ interested_gnm_set = set()
44
+ if interested_gnm_txt is not None:
45
+ if os.path.isfile(interested_gnm_txt):
46
+ for each_gnm in open(interested_gnm_txt):
47
+ interested_gnm_set.add(each_gnm.strip())
48
+ else:
49
+ print('%s not found, program exited' % interested_gnm_txt)
50
+ exit()
51
+
52
+ ################################################################################
53
+
54
+ # specify path to BMGE.jar
55
+ current_file_path = '/'.join(os.path.realpath(__file__).split('/')[:-1])
56
+ pwd_bmge_jar = '%s/BMGE.jar' % current_file_path
57
+
58
+ fa_file_re = '%s/*.%s' % (oma_op_fasta, fasta_file_ext)
59
+ fa_file_list = glob.glob(fa_file_re)
60
+ if len(fa_file_list) == 0:
61
+ print('No file found in %s, program exited!' % oma_op_fasta)
62
+ exit()
63
+
64
+ og_to_gene_dict = dict()
65
+ for each_fa in fa_file_list:
66
+ _, f_base, _ = sep_path_basename_ext(each_fa)
67
+ seq_id_set = set()
68
+ for each_seq in SeqIO.parse(each_fa, 'fasta'):
69
+ seq_id_set.add(each_seq.id)
70
+ og_to_gene_dict[f_base] = seq_id_set
71
+
72
+ ################################################################################
73
+
74
+ gnm_to_process = set()
75
+ for each_og in og_to_gene_dict:
76
+ gene_set = og_to_gene_dict[each_og]
77
+ gnm_set = set()
78
+ for each_gene in gene_set:
79
+ gnm_id = '_'.join(each_gene.split('_')[:-1])
80
+ if seq_named_by_gnm is True:
81
+ gnm_id = each_gene
82
+ gnm_set.add(gnm_id)
83
+ if interested_gnm_txt is None:
84
+ gnm_to_process.add(gnm_id)
85
+ else:
86
+ if gnm_id in interested_gnm_set:
87
+ gnm_to_process.add(gnm_id)
88
+
89
+ if len(gene_set) != len(gnm_set):
90
+ print('Program exited!')
91
+ exit()
92
+
93
+ ################################################################################
94
+
95
+ # define file name
96
+ qualified_og_dir = '%s/qualified_OGs' % op_dir
97
+ cmds_in_one_line_txt = '%s/cmds_mafft_bmge_iqtree.txt' % op_dir
98
+ ignored_marker_txt = '%s/ignored_markers.txt' % op_dir
99
+
100
+ # create output folder
101
+ if os.path.isdir(op_dir) is True:
102
+ if force_overwrite is True:
103
+ os.system('rm -r %s' % op_dir)
104
+ else:
105
+ print('%s exist, program exited!' % op_dir)
106
+ exit()
107
+ os.mkdir(op_dir)
108
+ os.mkdir(qualified_og_dir)
109
+
110
+ ################################################################################
111
+
112
+ cmds_in_one_line_txt_handle = open(cmds_in_one_line_txt, 'w')
113
+ ignored_og_dict = dict()
114
+ for each_og in sorted(list(og_to_gene_dict.keys())):
115
+ seq_file_in = '%s/%s.%s' % (oma_op_fasta, each_og, fasta_file_ext)
116
+ file_out_seq = '%s/%s.%s' % (qualified_og_dir, each_og, fasta_file_ext)
117
+ file_out_aln = '%s.aln' % each_og
118
+ file_out_aln_trimmed = '%s_trimmed.aln' % each_og
119
+
120
+ seq_file_out_handle = open(file_out_seq, 'w')
121
+ current_gnm_set = set()
122
+ for each_seq in SeqIO.parse(seq_file_in, 'fasta'):
123
+ seq_id = each_seq.id
124
+ gnm_id = '_'.join(seq_id.split('_')[:-1])
125
+ if seq_named_by_gnm is True:
126
+ gnm_id = seq_id
127
+ if gnm_id in gnm_to_process:
128
+ current_gnm_set.add(gnm_id)
129
+ seq_file_out_handle.write('>%s\n' % each_seq.id)
130
+ seq_file_out_handle.write('%s\n' % each_seq.seq)
131
+ seq_file_out_handle.close()
132
+
133
+ cov_value = len(current_gnm_set)*100/len(gnm_to_process)
134
+ cov_value = float("{0:.2f}".format(cov_value))
135
+
136
+ if cov_value < cov_cutoff:
137
+ report_str = 'Ignored %s, contains proteins from %s (%s%s) genomes, < %s%s.' % (each_og, len(current_gnm_set), cov_value, '%', cov_cutoff, '%')
138
+ ignored_og_dict[each_og] = report_str
139
+ os.system('rm %s' % file_out_seq)
140
+ else:
141
+ # align, trim and iqtree
142
+ mafft_cmd = 'mafft-einsi --thread %s --quiet %s.%s > %s' % (num_of_js_threads, each_og, fasta_file_ext, file_out_aln)
143
+ bmge_cmd = 'java -jar %s -i %s -m %s -t AA -h %s -of %s' % (pwd_bmge_jar, file_out_aln, bmge_trim_model, bmge_entropy_score_cutoff, file_out_aln_trimmed)
144
+ iqtree_cmd = 'iqtree2 -s %s --seqtype AA -m %s -B 1000 --wbtl --bnni --prefix %s -T %s --quiet' % (file_out_aln_trimmed, iqtree_model, each_og, num_of_js_threads)
145
+ # Undinarchaeota illuminate DPANN phylogeny and the impact of gene transfer on archaeal evolution, settings: -m LG+G -bb 1000 -wbtl -bnni
146
+ cmds_in_one_line_txt_handle.write('%s; %s; %s\n' % (mafft_cmd, bmge_cmd, iqtree_cmd))
147
+ cmds_in_one_line_txt_handle.close()
148
+
149
+ # report ignored markers
150
+ if len(ignored_og_dict) > 0:
151
+ print('The following %s markers were ignored due to low genome coverage, see details in %s:' % (len(ignored_og_dict), ignored_marker_txt))
152
+ print('\n'.join(sorted(list(ignored_og_dict.keys()))))
153
+ ignored_marker_txt_handle = open(ignored_marker_txt, 'w')
154
+ for each_ignored_marker in sorted(list(ignored_og_dict.keys())):
155
+ ignored_marker_txt_handle.write(ignored_og_dict[each_ignored_marker] + '\n')
156
+ ignored_marker_txt_handle.close()
157
+
158
+ # report
159
+ print('You will need to execute the commands exported to the following file before moving to SplitScore2')
160
+ print(cmds_in_one_line_txt)
161
+ print('Done!')
162
+
163
+
164
+ if __name__ == '__main__':
165
+
166
+ SplitScore1_parser = argparse.ArgumentParser()
167
+ SplitScore1_parser.add_argument('-i', required=True, help='orthologous gene sequence')
168
+ SplitScore1_parser.add_argument('-x', required=True, help='fasta file extension')
169
+ SplitScore1_parser.add_argument('-o', required=True, help='output directory')
170
+ SplitScore1_parser.add_argument('-u', required=False, default=None, help='interested genomes, no file extension')
171
+ SplitScore1_parser.add_argument('-m', required=False, default='LG+G', help='iqtree_model, default: LG+G')
172
+ SplitScore1_parser.add_argument('-c', required=False, type=int, default=75, help='coverage cutoff, default: 75')
173
+ SplitScore1_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
174
+ SplitScore1_parser.add_argument('-seq_named_by_gnm', required=False, action="store_true", help='named_by_gnm, specify if sequence named by gnm')
175
+ SplitScore1_parser.add_argument('-jst', required=False, type=int, default=1, help='num of threads for iqtree2, default: 1')
176
+ args = vars(SplitScore1_parser.parse_args())
177
+ SplitScore1(args)
@@ -0,0 +1,148 @@
1
+ from __future__ import print_function
2
+ import os
3
+ import argparse
4
+ from Bio import SeqIO
5
+
6
+
7
+ SplitScore1OMA_usage = '''
8
+ ======================== SplitScore1OMA example commands ========================
9
+
10
+ # SplitScore1
11
+ TreeSAK SplitScore1OMA -i OrthologousGroups.txt -s OrthologousGroupsFasta -o step1_op_dir -t 6 -f
12
+ TreeSAK SplitScore1OMA -i OrthologousGroups.txt -s OrthologousGroupsFasta -o step1_op_dir -t 6 -f -u interested_gnm.txt
13
+ # Please ensure that all the commands in iqtree_cmds.txt have been executed before proceeding to step 2.
14
+
15
+ =================================================================================
16
+ '''
17
+
18
+
19
+ def select_seq(seq_file, seq_id_list, output_file):
20
+ output_file_handle = open(output_file, 'w')
21
+ for seq_record in SeqIO.parse(seq_file, 'fasta'):
22
+ seq_id = seq_record.id
23
+ if seq_id in seq_id_list:
24
+ output_file_handle.write('>%s\n' % seq_id)
25
+ output_file_handle.write('%s\n' % str(seq_record.seq))
26
+ output_file_handle.close()
27
+
28
+
29
+ def get_gene_tree(oma_op_txt, oma_op_fasta, interested_gnm_txt, cov_cutoff, oma_op_fasta_qualified, iqtree_model, num_of_js_threads, force_overwrite, get_gene_tree_cmd_txt):
30
+
31
+ # get the total number of genome
32
+ genome_id_set = set()
33
+ for each_group in open(oma_op_txt):
34
+ if not each_group.startswith('#'):
35
+ for each_gene in each_group.strip().split('\t')[1:]:
36
+ gnm_id = '_'.join(each_gene.split(':')[1].split(' ')[0].split('_')[:-1])
37
+ genome_id_set.add(gnm_id)
38
+
39
+ interested_gnm_set = set()
40
+ if interested_gnm_txt is not None:
41
+ for each_gnm in open(interested_gnm_txt):
42
+ interested_gnm_set.add(each_gnm.strip())
43
+ else:
44
+ interested_gnm_set = genome_id_set
45
+
46
+ # create output folder
47
+ if os.path.isdir(oma_op_fasta_qualified) is True:
48
+ if force_overwrite is True:
49
+ os.system('rm -r %s' % oma_op_fasta_qualified)
50
+ else:
51
+ print('%s already exist, program exited!' % oma_op_fasta_qualified)
52
+ exit()
53
+ os.system('mkdir %s' % oma_op_fasta_qualified)
54
+
55
+ # filter OMA output
56
+ qualified_grp_to_gene_dict = dict()
57
+ for each_group in open(oma_op_txt):
58
+ if not each_group.startswith('#'):
59
+ each_group_split = each_group.strip().split('\t')
60
+ group_id = each_group_split[0]
61
+ gene_list_by_gnm = each_group_split[1:]
62
+ current_gene_list = [i.split(':')[1].split(' ')[0] for i in gene_list_by_gnm]
63
+ current_gnm_list_interested = []
64
+ current_gene_list_interested = []
65
+ for gene in current_gene_list:
66
+ gnm = '_'.join(gene.split('_')[:-1])
67
+ if gnm in interested_gnm_set:
68
+ current_gnm_list_interested.append(gnm)
69
+ current_gene_list_interested.append(gene)
70
+
71
+ current_cov = len(current_gnm_list_interested) * 100 / len(interested_gnm_set)
72
+ if current_cov >= cov_cutoff:
73
+ qualified_grp_to_gene_dict[group_id] = current_gene_list_interested
74
+
75
+ print('The number of orthologous groups with coverage >= %s is %s.' % (cov_cutoff, len(qualified_grp_to_gene_dict)))
76
+
77
+ # prepare commands for getting gene tree
78
+ get_gene_tree_cmd_txt_handle = open(get_gene_tree_cmd_txt, 'w')
79
+ for qualified_grp in sorted(list(qualified_grp_to_gene_dict.keys())):
80
+ group_id_only_num = qualified_grp.replace('OMA', '')
81
+ while group_id_only_num[0] == '0':
82
+ group_id_only_num = group_id_only_num[1:]
83
+
84
+ # define file name
85
+ og_id = 'OG%s' % group_id_only_num
86
+ pwd_seq_file_in = '%s/%s.fa' % (oma_op_fasta, og_id)
87
+ pwd_og_seq = '%s/%s.fa' % (oma_op_fasta_qualified, og_id)
88
+ pwd_og_aln = '%s/%s.aln' % (oma_op_fasta_qualified, og_id)
89
+ pwd_og_aln_trimmed = '%s/%s_trimmed.aln' % (oma_op_fasta_qualified, og_id)
90
+
91
+ # get sequence
92
+ if len(interested_gnm_set) == len(genome_id_set):
93
+ cp_cmd = 'cp %s %s' % (pwd_seq_file_in, pwd_og_seq)
94
+ os.system(cp_cmd)
95
+ else:
96
+ select_seq(pwd_seq_file_in, qualified_grp_to_gene_dict[qualified_grp], pwd_og_seq)
97
+
98
+ # align, trim and iqtree
99
+ mafft_cmd = 'mafft-einsi --thread %s --quiet %s > %s' % (num_of_js_threads, pwd_og_seq, pwd_og_aln)
100
+ trimal_cmd = 'trimal -in %s -out %s -automated1' % (pwd_og_aln, pwd_og_aln_trimmed)
101
+ iqtree_cmd = 'iqtree2 -s %s --seqtype AA -m %s -T %s -B 1000 --quiet --wbtl --prefix %s/%s' % (pwd_og_aln_trimmed, iqtree_model, num_of_js_threads, oma_op_fasta_qualified, og_id)
102
+ cmds_one_line = '%s; %s; %s' % (mafft_cmd, trimal_cmd, iqtree_cmd)
103
+ get_gene_tree_cmd_txt_handle.write(cmds_one_line.replace((oma_op_fasta_qualified + '/'), '') + '\n')
104
+ get_gene_tree_cmd_txt_handle.close()
105
+
106
+
107
+ def SplitScore1OMA(args):
108
+
109
+ oma_op_txt = args['i']
110
+ oma_op_fasta = args['s']
111
+ interested_gnm_txt = args['u']
112
+ iqtree_model = args['m']
113
+ cov_cutoff = args['c']
114
+ force_overwrite = args['f']
115
+ num_of_js_threads = args['jst']
116
+ step_1_op_dir = args['o']
117
+
118
+ # define file name
119
+ qualified_og_dir = '%s/qualified_OGs' % step_1_op_dir
120
+ iqtree_cmds_txt = '%s/iqtree_cmds.txt' % step_1_op_dir
121
+
122
+ # create output folder
123
+ if os.path.isdir(step_1_op_dir) is True:
124
+ if force_overwrite is True:
125
+ os.system('rm -r %s' % step_1_op_dir)
126
+ else:
127
+ print('%s exist, program exited!' % step_1_op_dir)
128
+ exit()
129
+ os.mkdir(step_1_op_dir)
130
+ os.mkdir(qualified_og_dir)
131
+
132
+ # get get_gene_tree
133
+ get_gene_tree(oma_op_txt, oma_op_fasta, interested_gnm_txt, cov_cutoff, qualified_og_dir, iqtree_model, num_of_js_threads, force_overwrite, iqtree_cmds_txt)
134
+
135
+
136
+ if __name__ == '__main__':
137
+
138
+ SplitScore1OMA_parser = argparse.ArgumentParser()
139
+ SplitScore1OMA_parser.add_argument('-i', required=True, help='OrthologousGroups.txt, produced by OMA')
140
+ SplitScore1OMA_parser.add_argument('-s', required=True, help='OrthologousGroupsFasta, produced by OMA')
141
+ SplitScore1OMA_parser.add_argument('-u', required=False, default= None, help='ID of interested genomes, no file extension')
142
+ SplitScore1OMA_parser.add_argument('-o', required=True, help='output directory')
143
+ SplitScore1OMA_parser.add_argument('-m', required=False, default='LG+G+I', help='iqtree_model, default: LG+G+I')
144
+ SplitScore1OMA_parser.add_argument('-c', required=False, type=int, default=80, help='coverage cutoff, default: 80')
145
+ SplitScore1OMA_parser.add_argument('-f', required=False, action="store_true", help='force overwrite')
146
+ SplitScore1OMA_parser.add_argument('-jst', required=False, type=int, default=1, help='num of threads for inferring gene tree, default: 1')
147
+ args = vars(SplitScore1OMA_parser.parse_args())
148
+ SplitScore1OMA(args)